From 5082b6b034f457a925e585717adc20bfc8cf86e1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 14 Mar 2023 12:57:35 +0100 Subject: [PATCH] radv: add support for caching PS epilogs For PS epilogs created at link time because libraries are still not cached. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_pipeline.c | 9 +++---- src/amd/vulkan/radv_pipeline_cache.c | 49 ++++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_pipeline_rt.c | 2 +- src/amd/vulkan/radv_private.h | 3 +++ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 8616a13..3e089f2 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3415,10 +3415,6 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, if (found_in_application_cache) pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; - /* TODO: Add PS epilogs to the cache. */ - if (!radv_pipeline_create_ps_epilog(pipeline, pipeline_key, lib_flags, noop_fs, NULL)) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - result = VK_SUCCESS; goto done; } @@ -3540,7 +3536,8 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, pipeline->base.shaders[MESA_SHADER_COMPUTE] = pipeline->base.gs_copy_shader; } - radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, 0); + radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, + ps_epilog_binary, NULL, 0); if (pipeline->base.gs_copy_shader) { pipeline->base.gs_copy_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE]; @@ -5408,7 +5405,7 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline, } if (!keep_executable_info) { - radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, 0); + radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries, NULL, NULL, 0); } free(binaries[MESA_SHADER_COMPUTE]); diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index c52a34e..e4c58d4 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -41,6 +41,8 @@ struct cache_entry { uint32_t binary_sizes[MESA_VULKAN_SHADER_STAGES]; uint32_t num_stack_sizes; struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES]; + uint32_t ps_epilog_binary_size; + struct radv_shader_part *ps_epilog; char code[0]; }; @@ -120,6 +122,8 @@ entry_size(const struct cache_entry *entry) for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) if (entry->binary_sizes[i]) ret += entry->binary_sizes[i]; + if (entry->ps_epilog_binary_size) + ret += entry->ps_epilog_binary_size; ret += sizeof(struct radv_pipeline_shader_stack_size) * entry->num_stack_sizes; ret = align(ret, alignof(struct cache_entry)); return ret; @@ -387,6 +391,23 @@ radv_create_shaders_from_pipeline_cache(struct radv_device *device, pipeline->shaders[MESA_SHADER_COMPUTE] = NULL; } + if (!entry->ps_epilog && entry->ps_epilog_binary_size) { + struct radv_shader_part_binary *binary = calloc(1, entry->ps_epilog_binary_size); + memcpy(binary, p, entry->ps_epilog_binary_size); + p += entry->ps_epilog_binary_size; + + entry->ps_epilog = radv_shader_part_create(device, binary, + device->physical_device->ps_wave_size); + + free(binary); + } + + if (entry->ps_epilog) { + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); + + graphics_pipeline->ps_epilog = entry->ps_epilog; + } + assert(num_rt_groups == entry->num_stack_sizes); for (int i = 0; i < num_rt_groups; ++i) { memcpy(&rt_groups[i].stack_size, p, sizeof(struct radv_pipeline_shader_stack_size)); @@ -399,6 +420,9 @@ radv_create_shaders_from_pipeline_cache(struct radv_device *device, for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) if (entry->shaders[i]) radv_shader_ref(entry->shaders[i]); + + if (entry->ps_epilog) + radv_shader_part_ref(entry->ps_epilog); } assert((uintptr_t)p <= (uintptr_t)entry + entry_size(entry)); @@ -410,6 +434,7 @@ void radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries, + struct radv_shader_part_binary *ps_epilog_binary, const struct radv_ray_tracing_module *rt_groups, uint32_t num_rt_groups) { @@ -429,6 +454,15 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel radv_shader_ref(pipeline->shaders[i]); } + if (entry->ps_epilog) { + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); + + radv_shader_part_unref(cache->device, graphics_pipeline->ps_epilog); + + graphics_pipeline->ps_epilog = entry->ps_epilog; + radv_shader_part_ref(graphics_pipeline->ps_epilog); + } + radv_pipeline_cache_unlock(cache); return; } @@ -445,6 +479,8 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) if (pipeline->shaders[i]) size += binaries[i]->total_size; + if (ps_epilog_binary) + size += ps_epilog_binary->total_size; const size_t size_without_align = size; size = align(size_without_align, alignof(struct cache_entry)); @@ -469,6 +505,12 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel p += binaries[i]->total_size; } + if (ps_epilog_binary) { + entry->ps_epilog_binary_size = ps_epilog_binary->total_size; + memcpy(p, ps_epilog_binary, ps_epilog_binary->total_size); + p += ps_epilog_binary->total_size; + } + for (int i = 0; i < num_rt_groups; ++i) { memcpy(p, &rt_groups[i].stack_size, sizeof(struct radv_pipeline_shader_stack_size)); p += sizeof(struct radv_pipeline_shader_stack_size); @@ -511,6 +553,13 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel radv_shader_ref(pipeline->shaders[i]); } + if (ps_epilog_binary) { + struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); + + entry->ps_epilog = graphics_pipeline->ps_epilog; + radv_shader_part_ref(graphics_pipeline->ps_epilog); + } + radv_pipeline_cache_add_entry(cache, entry); radv_pipeline_cache_unlock(cache); diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 977d241..9a3dcc7 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -305,7 +305,7 @@ radv_rt_pipeline_compile(struct radv_pipeline *pipeline, } if (!keep_executable_info) { - radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries, rt_groups, + radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries, NULL, rt_groups, num_rt_groups); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 074110f..059165b 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -440,10 +440,13 @@ bool radv_create_shaders_from_pipeline_cache( struct radv_pipeline *pipeline, struct radv_ray_tracing_module *rt_groups, uint32_t num_rt_groups, bool *found_in_application_cache); +struct radv_shader_binary_part; + void radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries, + struct radv_shader_part_binary *ps_epilog_binary, const struct radv_ray_tracing_module *rt_groups, uint32_t num_rt_groups); -- 2.7.4