radv/rt: separate shader compilation
authorDaniel Schürmann <daniel@schuermann.dev>
Fri, 10 Mar 2023 17:40:58 +0000 (18:40 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 8 Jun 2023 00:37:03 +0000 (00:37 +0000)
With this patch, we compile separately
 - general shaders (raygen, miss, callable)
 - closest-hit shaders
 - traversal shader (incl. all intersection / any-hit shaders)

Each shader uses the following scheme:

  if (shader_pc == shader_va) {
     <shader code>
  }
  next = select_next_shader(shader_va)
  jump next

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22096>

src/amd/vulkan/radv_pipeline_rt.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_rt_common.c
src/amd/vulkan/radv_rt_shader.c
src/amd/vulkan/radv_shader.h

index feb41fe..25a6b18 100644 (file)
@@ -27,6 +27,7 @@
 #include "radv_debug.h"
 #include "radv_private.h"
 #include "radv_shader.h"
+#include "vk_pipeline.h"
 
 struct rt_handle_hash_entry {
    uint32_t key;
@@ -362,34 +363,42 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
 }
 
 static VkResult
-radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
-                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
-                           const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
-                           const struct radv_pipeline_key *key,
-                           struct radv_ray_tracing_stage *stages)
+radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
+                        const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
+                        const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
+                        const struct radv_pipeline_key *key,
+                        struct radv_ray_tracing_pipeline *pipeline)
 {
+   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
+      return VK_PIPELINE_COMPILE_REQUIRED;
+
+   struct radv_ray_tracing_stage *stages = pipeline->stages;
+
    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
       int64_t stage_start = os_time_get_nano();
       struct radv_pipeline_stage stage;
       radv_pipeline_stage_init(&pCreateInfo->pStages[idx], &stage, stages[idx].stage);
 
-      uint8_t shader_sha1[SHA1_DIGEST_LENGTH];
-      radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false));
-
-      /* lookup the stage in cache */
-      stages[idx].shader = radv_pipeline_cache_search_nir(device, cache, shader_sha1);
-
       if (stages[idx].shader)
          goto feedback;
 
-      if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
-         return VK_PIPELINE_COMPILE_REQUIRED;
-
       /* precompile the shader */
-      struct nir_shader *nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key);
-      stages[idx].shader = radv_pipeline_cache_nir_to_handle(device, cache, nir, shader_sha1,
-                                                             !key->optimisations_disabled);
-      ralloc_free(nir);
+      stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key);
+
+      if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
+         uint32_t stack_size = 0;
+         struct radv_shader *shader =
+            radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size);
+         stages[idx].stack_size = stack_size;
+         stages[idx].shader = shader ? &shader->base : NULL;
+      } else {
+         uint8_t shader_sha1[SHA1_DIGEST_LENGTH];
+         radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false));
+         stages[idx].stack_size = stage.nir->scratch_size;
+         stages[idx].shader = radv_pipeline_cache_nir_to_handle(
+            device, cache, stage.nir, shader_sha1, !key->optimisations_disabled);
+      }
+      ralloc_free(stage.nir);
 
       if (!stages[idx].shader)
          return VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -402,93 +411,27 @@ radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache
       }
    }
 
-   return VK_SUCCESS;
-}
-
-static VkResult
-radv_rt_pipeline_compile(struct radv_ray_tracing_pipeline *pipeline,
-                         struct radv_pipeline_layout *pipeline_layout, struct radv_device *device,
-                         struct vk_pipeline_cache *cache,
-                         const struct radv_pipeline_key *pipeline_key,
-                         const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
-                         const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
-{
-   struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
-   bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags);
-   bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags);
-   struct radv_pipeline_stage rt_stage = {0};
+   if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
+      return VK_SUCCESS;
 
-   /* First check if we can get things from the cache before we take the expensive step of
-    * generating the nir. */
-   struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
-   VkPipelineShaderStageCreateInfo stage = {
+   /* create traversal shader */
+   struct vk_shader_module traversal_module = {
+      .base.type = VK_OBJECT_TYPE_SHADER_MODULE,
+      .nir = radv_build_traversal_shader(device, pipeline, pCreateInfo, key),
+   };
+   const VkPipelineShaderStageCreateInfo pStage = {
       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-      .pNext = NULL,
       .stage = VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
-      .module = vk_shader_module_to_handle(&module),
+      .module = vk_shader_module_to_handle(&traversal_module),
       .pName = "main",
    };
-
-   radv_pipeline_stage_init(&stage, &rt_stage, vk_to_mesa_shader_stage(stage.stage));
-
-   if (!keep_executable_info &&
-       radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo))
-      return VK_SUCCESS;
-
-   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
-      return VK_PIPELINE_COMPILE_REQUIRED;
-
-   VkResult result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback,
-                                                pipeline_key, pipeline->stages);
-   if (result != VK_SUCCESS)
-      return result;
-
-   rt_stage.internal_nir = create_rt_shader(device, pipeline, pCreateInfo, pipeline_key);
-
-   /* Compile SPIR-V shader to NIR. */
-   rt_stage.nir =
-      radv_shader_spirv_to_nir(device, &rt_stage, pipeline_key, pipeline->base.base.is_internal);
-
-   radv_optimize_nir(rt_stage.nir, pipeline_key->optimisations_disabled);
-
-   /* Gather info again, information such as outputs_read can be out-of-date. */
-   nir_shader_gather_info(rt_stage.nir, nir_shader_get_entrypoint(rt_stage.nir));
-
-   /* Run the shader info pass. */
-   radv_nir_shader_info_init(&rt_stage.info);
-   radv_nir_shader_info_pass(device, rt_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key,
-                             pipeline->base.base.type, false, &rt_stage.info);
-
-   radv_declare_shader_args(device, pipeline_key, &rt_stage.info, rt_stage.stage, MESA_SHADER_NONE,
-                            RADV_SHADER_TYPE_DEFAULT, &rt_stage.args);
-
-   rt_stage.info.user_sgprs_locs = rt_stage.args.user_sgprs_locs;
-   rt_stage.info.inline_push_constant_mask = rt_stage.args.ac.inline_push_const_mask;
-
-   /* Postprocess NIR. */
-   radv_postprocess_nir(device, pipeline_layout, pipeline_key, MESA_SHADER_NONE, &rt_stage);
-
-   if (radv_can_dump_shader(device, rt_stage.nir, false))
-      nir_print_shader(rt_stage.nir, stderr);
-
-   /* Compile NIR shader to AMD assembly. */
-   pipeline->base.base.shaders[rt_stage.stage] =
-      radv_shader_nir_to_asm(device, cache, &rt_stage, &rt_stage.nir, 1, pipeline_key,
-                             keep_executable_info, keep_statistic_info, &binaries[rt_stage.stage]);
-
-   if (!keep_executable_info) {
-      radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount,
-                                             pipeline->sha1);
-   }
-
-   free(binaries[rt_stage.stage]);
-   if (radv_can_dump_shader_stats(device, rt_stage.nir)) {
-      radv_dump_shader_stats(device, &pipeline->base.base,
-                             pipeline->base.base.shaders[rt_stage.stage], rt_stage.stage, stderr);
-   }
-
-   ralloc_free(rt_stage.internal_nir);
-   ralloc_free(rt_stage.nir);
+   struct radv_pipeline_stage traversal_stage = {
+      .stage = MESA_SHADER_INTERSECTION,
+      .nir = traversal_module.nir,
+   };
+   vk_pipeline_hash_shader_stage(&pStage, NULL, traversal_stage.shader_sha1);
+   pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] =
+      radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &traversal_stage, NULL);
 
    return VK_SUCCESS;
 }
@@ -601,11 +544,16 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
    pipeline->base.base.shaders[MESA_SHADER_COMPUTE] = radv_create_rt_prolog(device);
 
    /* create combined config */
-   combine_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config,
-                  &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
-
-   postprocess_rt_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config,
-                         device->physical_device->rad_info.gfx_level,
+   struct ac_shader_config *config = &pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config;
+   for (unsigned i = 0; i < pipeline->stage_count; i++) {
+      if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
+         struct radv_shader *shader =
+            container_of(pipeline->stages[i].shader, struct radv_shader, base);
+         combine_config(config, &shader->config);
+      }
+   }
+   combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
+   postprocess_rt_config(config, device->physical_device->rad_info.gfx_level,
                          device->physical_device->rt_wave_size);
 }
 
@@ -619,6 +567,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
    RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
    VkResult result;
    bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags);
+   bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags);
    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
    if (creation_feedback)
@@ -646,7 +595,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
    radv_rt_fill_stage_info(pCreateInfo, stages);
    result = radv_rt_fill_group_info(device, pCreateInfo, stages, pipeline->groups);
    if (result != VK_SUCCESS)
-      goto done;
+      goto fail;
 
    struct radv_pipeline_key key =
       radv_generate_rt_pipeline_key(device, pipeline, pCreateInfo->flags);
@@ -655,26 +604,41 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
                         radv_get_hash_flags(device, keep_statistic_info));
    pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->sha1;
 
-   if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
-      result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback, &key,
-                                          pipeline->stages);
-      goto done;
-   }
+   bool cache_hit = false;
+   if (!keep_executable_info)
+      cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo);
 
-   result = radv_rt_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, pCreateInfo,
-                                     creation_feedback);
+   if (!cache_hit) {
+      result =
+         radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline);
 
-   if (result != VK_SUCCESS)
-      goto done;
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
 
-   compute_rt_stack_size(pCreateInfo, pipeline);
-   compile_rt_prolog(device, pipeline);
+   if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
+      compute_rt_stack_size(pCreateInfo, pipeline);
+      compile_rt_prolog(device, pipeline);
 
-   radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout);
+      radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout);
+      radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false);
+   }
 
-   radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false);
+   if (!cache_hit)
+      radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount,
+                                             pipeline->sha1);
+
+   /* write shader VAs into group handles */
+   for (unsigned i = 0; i < pipeline->group_count; i++) {
+      if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) {
+         struct radv_shader *shader =
+            container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader,
+                         struct radv_shader, base);
+         pipeline->groups[i].handle.recursive_shader_ptr = shader->va;
+      }
+   }
 
-done:
+fail:
    if (creation_feedback)
       creation_feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - pipeline_start;
 
index 3a4a2cc..a223ed9 100644 (file)
@@ -2350,7 +2350,8 @@ struct radv_ray_tracing_stage {
 static inline bool
 radv_ray_tracing_stage_is_compiled(struct radv_ray_tracing_stage *stage)
 {
-   return false;
+   return stage->stage == MESA_SHADER_RAYGEN || stage->stage == MESA_SHADER_CALLABLE ||
+          stage->stage == MESA_SHADER_CLOSEST_HIT || stage->stage == MESA_SHADER_MISS;
 }
 
 struct radv_ray_tracing_pipeline {
index 60d58da..6478350 100644 (file)
@@ -39,7 +39,6 @@ radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
       return false;
 
    if (rt_pipelines) {
-      return false;
       if (pdevice->use_llvm)
          return false;
 
index b7bc312..3d67f91 100644 (file)
@@ -1432,7 +1432,7 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave
    return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
 }
 
-static nir_shader *
+nir_shader *
 radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
                             const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
                             const struct radv_pipeline_key *key)
index f3ced77..28ea75c 100644 (file)
@@ -790,4 +790,8 @@ nir_shader *create_rt_shader(struct radv_device *device, struct radv_ray_tracing
                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
                              const struct radv_pipeline_key *key);
 
+nir_shader *radv_build_traversal_shader(struct radv_device *device,
+                                        struct radv_ray_tracing_pipeline *pipeline,
+                                        const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
+                                        const struct radv_pipeline_key *key);
 #endif