From: Daniel Schürmann Date: Fri, 10 Mar 2023 17:40:58 +0000 (+0100) Subject: radv/rt: separate shader compilation X-Git-Tag: upstream/23.3.3~7370 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=163c97e6a7adf40a9bebeaf2fc338faaa387702a;p=platform%2Fupstream%2Fmesa.git radv/rt: separate shader compilation With this patch, we compile separately - general shaders (raygen, miss, callable) - closest-hit shaders - traversal shader (incl. all intersection / any-hit shaders) Each shader uses the following scheme: if (shader_pc == shader_va) { } next = select_next_shader(shader_va) jump next Part-of: --- diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index feb41fe..25a6b18 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -27,6 +27,7 @@ #include "radv_debug.h" #include "radv_private.h" #include "radv_shader.h" +#include "vk_pipeline.h" struct rt_handle_hash_entry { uint32_t key; @@ -362,34 +363,42 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, } static VkResult -radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const VkPipelineCreationFeedbackCreateInfo *creation_feedback, - const struct radv_pipeline_key *key, - struct radv_ray_tracing_stage *stages) +radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache, + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + const VkPipelineCreationFeedbackCreateInfo *creation_feedback, + const struct radv_pipeline_key *key, + struct radv_ray_tracing_pipeline *pipeline) { + if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) + return VK_PIPELINE_COMPILE_REQUIRED; + + struct radv_ray_tracing_stage *stages = pipeline->stages; + for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { int64_t stage_start = os_time_get_nano(); struct radv_pipeline_stage stage; radv_pipeline_stage_init(&pCreateInfo->pStages[idx], &stage, stages[idx].stage); - uint8_t shader_sha1[SHA1_DIGEST_LENGTH]; - radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false)); - - /* lookup the stage in cache */ - stages[idx].shader = radv_pipeline_cache_search_nir(device, cache, shader_sha1); - if (stages[idx].shader) goto feedback; - if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) - return VK_PIPELINE_COMPILE_REQUIRED; - /* precompile the shader */ - struct nir_shader *nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key); - stages[idx].shader = radv_pipeline_cache_nir_to_handle(device, cache, nir, shader_sha1, - !key->optimisations_disabled); - ralloc_free(nir); + stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key); + + if (radv_ray_tracing_stage_is_compiled(&stages[idx])) { + uint32_t stack_size = 0; + struct radv_shader *shader = + radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size); + stages[idx].stack_size = stack_size; + stages[idx].shader = shader ? &shader->base : NULL; + } else { + uint8_t shader_sha1[SHA1_DIGEST_LENGTH]; + radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false)); + stages[idx].stack_size = stage.nir->scratch_size; + stages[idx].shader = radv_pipeline_cache_nir_to_handle( + device, cache, stage.nir, shader_sha1, !key->optimisations_disabled); + } + ralloc_free(stage.nir); if (!stages[idx].shader) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -402,93 +411,27 @@ radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache } } - return VK_SUCCESS; -} - -static VkResult -radv_rt_pipeline_compile(struct radv_ray_tracing_pipeline *pipeline, - struct radv_pipeline_layout *pipeline_layout, struct radv_device *device, - struct vk_pipeline_cache *cache, - const struct radv_pipeline_key *pipeline_key, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const VkPipelineCreationFeedbackCreateInfo *creation_feedback) -{ - struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL}; - bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags); - bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags); - struct radv_pipeline_stage rt_stage = {0}; + if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) + return VK_SUCCESS; - /* First check if we can get things from the cache before we take the expensive step of - * generating the nir. */ - struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; - VkPipelineShaderStageCreateInfo stage = { + /* create traversal shader */ + struct vk_shader_module traversal_module = { + .base.type = VK_OBJECT_TYPE_SHADER_MODULE, + .nir = radv_build_traversal_shader(device, pipeline, pCreateInfo, key), + }; + const VkPipelineShaderStageCreateInfo pStage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = NULL, .stage = VK_SHADER_STAGE_INTERSECTION_BIT_KHR, - .module = vk_shader_module_to_handle(&module), + .module = vk_shader_module_to_handle(&traversal_module), .pName = "main", }; - - radv_pipeline_stage_init(&stage, &rt_stage, vk_to_mesa_shader_stage(stage.stage)); - - if (!keep_executable_info && - radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo)) - return VK_SUCCESS; - - if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) - return VK_PIPELINE_COMPILE_REQUIRED; - - VkResult result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback, - pipeline_key, pipeline->stages); - if (result != VK_SUCCESS) - return result; - - rt_stage.internal_nir = create_rt_shader(device, pipeline, pCreateInfo, pipeline_key); - - /* Compile SPIR-V shader to NIR. */ - rt_stage.nir = - radv_shader_spirv_to_nir(device, &rt_stage, pipeline_key, pipeline->base.base.is_internal); - - radv_optimize_nir(rt_stage.nir, pipeline_key->optimisations_disabled); - - /* Gather info again, information such as outputs_read can be out-of-date. */ - nir_shader_gather_info(rt_stage.nir, nir_shader_get_entrypoint(rt_stage.nir)); - - /* Run the shader info pass. */ - radv_nir_shader_info_init(&rt_stage.info); - radv_nir_shader_info_pass(device, rt_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key, - pipeline->base.base.type, false, &rt_stage.info); - - radv_declare_shader_args(device, pipeline_key, &rt_stage.info, rt_stage.stage, MESA_SHADER_NONE, - RADV_SHADER_TYPE_DEFAULT, &rt_stage.args); - - rt_stage.info.user_sgprs_locs = rt_stage.args.user_sgprs_locs; - rt_stage.info.inline_push_constant_mask = rt_stage.args.ac.inline_push_const_mask; - - /* Postprocess NIR. */ - radv_postprocess_nir(device, pipeline_layout, pipeline_key, MESA_SHADER_NONE, &rt_stage); - - if (radv_can_dump_shader(device, rt_stage.nir, false)) - nir_print_shader(rt_stage.nir, stderr); - - /* Compile NIR shader to AMD assembly. */ - pipeline->base.base.shaders[rt_stage.stage] = - radv_shader_nir_to_asm(device, cache, &rt_stage, &rt_stage.nir, 1, pipeline_key, - keep_executable_info, keep_statistic_info, &binaries[rt_stage.stage]); - - if (!keep_executable_info) { - radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, - pipeline->sha1); - } - - free(binaries[rt_stage.stage]); - if (radv_can_dump_shader_stats(device, rt_stage.nir)) { - radv_dump_shader_stats(device, &pipeline->base.base, - pipeline->base.base.shaders[rt_stage.stage], rt_stage.stage, stderr); - } - - ralloc_free(rt_stage.internal_nir); - ralloc_free(rt_stage.nir); + struct radv_pipeline_stage traversal_stage = { + .stage = MESA_SHADER_INTERSECTION, + .nir = traversal_module.nir, + }; + vk_pipeline_hash_shader_stage(&pStage, NULL, traversal_stage.shader_sha1); + pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = + radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &traversal_stage, NULL); return VK_SUCCESS; } @@ -601,11 +544,16 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline * pipeline->base.base.shaders[MESA_SHADER_COMPUTE] = radv_create_rt_prolog(device); /* create combined config */ - combine_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config, - &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config); - - postprocess_rt_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config, - device->physical_device->rad_info.gfx_level, + struct ac_shader_config *config = &pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config; + for (unsigned i = 0; i < pipeline->stage_count; i++) { + if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) { + struct radv_shader *shader = + container_of(pipeline->stages[i].shader, struct radv_shader, base); + combine_config(config, &shader->config); + } + } + combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config); + postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size); } @@ -619,6 +567,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); VkResult result; bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags); + bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags); const VkPipelineCreationFeedbackCreateInfo *creation_feedback = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); if (creation_feedback) @@ -646,7 +595,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, radv_rt_fill_stage_info(pCreateInfo, stages); result = radv_rt_fill_group_info(device, pCreateInfo, stages, pipeline->groups); if (result != VK_SUCCESS) - goto done; + goto fail; struct radv_pipeline_key key = radv_generate_rt_pipeline_key(device, pipeline, pCreateInfo->flags); @@ -655,26 +604,41 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, radv_get_hash_flags(device, keep_statistic_info)); pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->sha1; - if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) { - result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback, &key, - pipeline->stages); - goto done; - } + bool cache_hit = false; + if (!keep_executable_info) + cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo); - result = radv_rt_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, pCreateInfo, - creation_feedback); + if (!cache_hit) { + result = + radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline); - if (result != VK_SUCCESS) - goto done; + if (result != VK_SUCCESS) + goto fail; + } - compute_rt_stack_size(pCreateInfo, pipeline); - compile_rt_prolog(device, pipeline); + if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) { + compute_rt_stack_size(pCreateInfo, pipeline); + compile_rt_prolog(device, pipeline); - radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout); + radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout); + radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false); + } - radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false); + if (!cache_hit) + radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, + pipeline->sha1); + + /* write shader VAs into group handles */ + for (unsigned i = 0; i < pipeline->group_count; i++) { + if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) { + struct radv_shader *shader = + container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader, + struct radv_shader, base); + pipeline->groups[i].handle.recursive_shader_ptr = shader->va; + } + } -done: +fail: if (creation_feedback) creation_feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - pipeline_start; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 3a4a2cc..a223ed9 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2350,7 +2350,8 @@ struct radv_ray_tracing_stage { static inline bool radv_ray_tracing_stage_is_compiled(struct radv_ray_tracing_stage *stage) { - return false; + return stage->stage == MESA_SHADER_RAYGEN || stage->stage == MESA_SHADER_CALLABLE || + stage->stage == MESA_SHADER_CLOSEST_HIT || stage->stage == MESA_SHADER_MISS; } struct radv_ray_tracing_pipeline { diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 60d58da..6478350 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -39,7 +39,6 @@ radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines) return false; if (rt_pipelines) { - return false; if (pdevice->use_llvm) return false; diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index b7bc312..3d67f91 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -1432,7 +1432,7 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4); } -static nir_shader * +nir_shader * radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *key) diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index f3ced77..28ea75c 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -790,4 +790,8 @@ nir_shader *create_rt_shader(struct radv_device *device, struct radv_ray_tracing const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *key); +nir_shader *radv_build_traversal_shader(struct radv_device *device, + struct radv_ray_tracing_pipeline *pipeline, + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + const struct radv_pipeline_key *key); #endif