radv: add support for caching PS epilogs
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 14 Mar 2023 11:57:35 +0000 (12:57 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 21 Mar 2023 13:53:59 +0000 (13:53 +0000)
For PS epilogs created at link time because libraries are still not
cached.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21897>

src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_pipeline_cache.c
src/amd/vulkan/radv_pipeline_rt.c
src/amd/vulkan/radv_private.h

index 8616a13..3e089f2 100644 (file)
@@ -3415,10 +3415,6 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline,
       if (found_in_application_cache)
          pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
 
-      /* TODO: Add PS epilogs to the cache. */
-      if (!radv_pipeline_create_ps_epilog(pipeline, pipeline_key, lib_flags, noop_fs, NULL))
-         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
-
       result = VK_SUCCESS;
       goto done;
    }
@@ -3540,7 +3536,8 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline,
          pipeline->base.shaders[MESA_SHADER_COMPUTE] = pipeline->base.gs_copy_shader;
       }
 
-      radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, 0);
+      radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries,
+                                         ps_epilog_binary, NULL, 0);
 
       if (pipeline->base.gs_copy_shader) {
          pipeline->base.gs_copy_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
@@ -5408,7 +5405,7 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline,
    }
 
    if (!keep_executable_info) {
-      radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, 0);
+      radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, NULL, 0);
    }
 
    free(binaries[MESA_SHADER_COMPUTE]);
index c52a34e..e4c58d4 100644 (file)
@@ -41,6 +41,8 @@ struct cache_entry {
    uint32_t binary_sizes[MESA_VULKAN_SHADER_STAGES];
    uint32_t num_stack_sizes;
    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
+   uint32_t ps_epilog_binary_size;
+   struct radv_shader_part *ps_epilog;
    char code[0];
 };
 
@@ -120,6 +122,8 @@ entry_size(const struct cache_entry *entry)
    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
       if (entry->binary_sizes[i])
          ret += entry->binary_sizes[i];
+   if (entry->ps_epilog_binary_size)
+      ret += entry->ps_epilog_binary_size;
    ret += sizeof(struct radv_pipeline_shader_stack_size) * entry->num_stack_sizes;
    ret = align(ret, alignof(struct cache_entry));
    return ret;
@@ -387,6 +391,23 @@ radv_create_shaders_from_pipeline_cache(struct radv_device *device,
       pipeline->shaders[MESA_SHADER_COMPUTE] = NULL;
    }
 
+   if (!entry->ps_epilog && entry->ps_epilog_binary_size) {
+      struct radv_shader_part_binary *binary = calloc(1, entry->ps_epilog_binary_size);
+      memcpy(binary, p, entry->ps_epilog_binary_size);
+      p += entry->ps_epilog_binary_size;
+
+      entry->ps_epilog = radv_shader_part_create(device, binary,
+                                                 device->physical_device->ps_wave_size);
+
+      free(binary);
+   }
+
+   if (entry->ps_epilog) {
+      struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
+
+      graphics_pipeline->ps_epilog = entry->ps_epilog;
+   }
+
    assert(num_rt_groups == entry->num_stack_sizes);
    for (int i = 0; i < num_rt_groups; ++i) {
       memcpy(&rt_groups[i].stack_size, p, sizeof(struct radv_pipeline_shader_stack_size));
@@ -399,6 +420,9 @@ radv_create_shaders_from_pipeline_cache(struct radv_device *device,
       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
          if (entry->shaders[i])
             radv_shader_ref(entry->shaders[i]);
+
+      if (entry->ps_epilog)
+         radv_shader_part_ref(entry->ps_epilog);
    }
 
    assert((uintptr_t)p <= (uintptr_t)entry + entry_size(entry));
@@ -410,6 +434,7 @@ void
 radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,
                                    const unsigned char *sha1, struct radv_pipeline *pipeline,
                                    struct radv_shader_binary *const *binaries,
+                                   struct radv_shader_part_binary *ps_epilog_binary,
                                    const struct radv_ray_tracing_module *rt_groups,
                                    uint32_t num_rt_groups)
 {
@@ -429,6 +454,15 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
          radv_shader_ref(pipeline->shaders[i]);
       }
 
+      if (entry->ps_epilog) {
+         struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
+
+         radv_shader_part_unref(cache->device, graphics_pipeline->ps_epilog);
+
+         graphics_pipeline->ps_epilog = entry->ps_epilog;
+         radv_shader_part_ref(graphics_pipeline->ps_epilog);
+      }
+
       radv_pipeline_cache_unlock(cache);
       return;
    }
@@ -445,6 +479,8 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
       if (pipeline->shaders[i])
          size += binaries[i]->total_size;
+   if (ps_epilog_binary)
+      size += ps_epilog_binary->total_size;
    const size_t size_without_align = size;
    size = align(size_without_align, alignof(struct cache_entry));
 
@@ -469,6 +505,12 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
       p += binaries[i]->total_size;
    }
 
+   if (ps_epilog_binary) {
+      entry->ps_epilog_binary_size = ps_epilog_binary->total_size;
+      memcpy(p, ps_epilog_binary, ps_epilog_binary->total_size);
+      p += ps_epilog_binary->total_size;
+   }
+
    for (int i = 0; i < num_rt_groups; ++i) {
       memcpy(p, &rt_groups[i].stack_size, sizeof(struct radv_pipeline_shader_stack_size));
       p += sizeof(struct radv_pipeline_shader_stack_size);
@@ -511,6 +553,13 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
       radv_shader_ref(pipeline->shaders[i]);
    }
 
+   if (ps_epilog_binary) {
+      struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
+
+      entry->ps_epilog = graphics_pipeline->ps_epilog;
+      radv_shader_part_ref(graphics_pipeline->ps_epilog);
+   }
+
    radv_pipeline_cache_add_entry(cache, entry);
 
    radv_pipeline_cache_unlock(cache);
index 977d241..9a3dcc7 100644 (file)
@@ -305,7 +305,7 @@ radv_rt_pipeline_compile(struct radv_pipeline *pipeline,
    }
 
    if (!keep_executable_info) {
-      radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries, rt_groups,
+      radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries, NULL, rt_groups,
                                          num_rt_groups);
    }
 
index 074110f..059165b 100644 (file)
@@ -440,10 +440,13 @@ bool radv_create_shaders_from_pipeline_cache(
    struct radv_pipeline *pipeline, struct radv_ray_tracing_module *rt_groups,
    uint32_t num_rt_groups, bool *found_in_application_cache);
 
+struct radv_shader_binary_part;
+
 void radv_pipeline_cache_insert_shaders(struct radv_device *device,
                                         struct radv_pipeline_cache *cache,
                                         const unsigned char *sha1, struct radv_pipeline *pipeline,
                                         struct radv_shader_binary *const *binaries,
+                                        struct radv_shader_part_binary *ps_epilog_binary,
                                         const struct radv_ray_tracing_module *rt_groups,
                                         uint32_t num_rt_groups);