radv/rt: create separate radv_rt_pipeline struct
authorDaniel Schürmann <daniel@schuermann.dev>
Thu, 22 Sep 2022 13:26:01 +0000 (15:26 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 26 Sep 2022 13:03:44 +0000 (13:03 +0000)
inherited from radv_compute_pipeline to contain all RT-related information.
This will make it easier to transition to RT shader functions.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18755>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_pipeline_rt.c
src/amd/vulkan/radv_private.h

index c99c758..bbd28d9 100644 (file)
@@ -5133,15 +5133,15 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
       break;
    }
    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
-      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
+      struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
 
-      if (cmd_buffer->state.rt_pipeline == compute_pipeline)
+      if (cmd_buffer->state.rt_pipeline == rt_pipeline)
          return;
       radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
 
-      cmd_buffer->state.rt_pipeline = compute_pipeline;
+      cmd_buffer->state.rt_pipeline = rt_pipeline;
       cmd_buffer->push_constant_stages |= RADV_RT_STAGE_BITS;
-      if (compute_pipeline->dynamic_stack_size)
+      if (rt_pipeline->dynamic_stack_size)
          radv_set_rt_stack_size(cmd_buffer, cmd_buffer->state.rt_stack_size);
       break;
    }
@@ -8522,7 +8522,7 @@ static void
 radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables,
                 uint64_t indirect_va, enum radv_rt_mode mode)
 {
-   struct radv_compute_pipeline *pipeline = cmd_buffer->state.rt_pipeline;
+   struct radv_compute_pipeline *pipeline = &cmd_buffer->state.rt_pipeline->base;
    uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_COMPUTE];
 
    struct radv_dispatch_info info = {0};
@@ -8659,8 +8659,9 @@ radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size)
    unsigned scratch_bytes_per_wave = 0;
 
    if (cmd_buffer->state.rt_pipeline) {
-      scratch_bytes_per_wave = cmd_buffer->state.rt_pipeline->base.scratch_bytes_per_wave;
-      wave_size = cmd_buffer->state.rt_pipeline->base.shaders[MESA_SHADER_COMPUTE]->info.wave_size;
+      scratch_bytes_per_wave = cmd_buffer->state.rt_pipeline->base.base.scratch_bytes_per_wave;
+      wave_size =
+         cmd_buffer->state.rt_pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->info.wave_size;
    }
 
    /* The hardware register is specified as a multiple of 256 DWORDS. */
index edde3eb..8a11fb4 100644 (file)
@@ -201,11 +201,11 @@ radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline
          radv_shader_part_unref(device, graphics_pipeline->ps_epilog);
 
       vk_free(&device->vk.alloc, graphics_pipeline->state_data);
-   } else if (pipeline->type == RADV_PIPELINE_COMPUTE) {
-      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
+   } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
+      struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
 
-      free(compute_pipeline->rt_group_handles);
-      free(compute_pipeline->rt_stack_sizes);
+      free(rt_pipeline->group_handles);
+      free(rt_pipeline->stack_sizes);
    } else if (pipeline->type == RADV_PIPELINE_LIBRARY) {
       struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
 
@@ -6523,12 +6523,34 @@ radv_generate_compute_pipeline_key(struct radv_compute_pipeline *pipeline,
    return key;
 }
 
+static void
+radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline,
+                           const struct radv_pipeline_layout *layout)
+{
+   const struct radv_device *device = pipeline->base.device;
+
+   pipeline->base.user_data_0[MESA_SHADER_COMPUTE] = R_00B900_COMPUTE_USER_DATA_0;
+   pipeline->base.need_indirect_descriptor_sets |=
+      radv_shader_need_indirect_descriptor_sets(&pipeline->base, MESA_SHADER_COMPUTE);
+   radv_pipeline_init_scratch(device, &pipeline->base);
+
+   pipeline->base.push_constant_size = layout->push_constant_size;
+   pipeline->base.dynamic_offset_count = layout->dynamic_offset_count;
+
+   if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
+      struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
+      unsigned *cs_block_size = compute_shader->info.cs.block_size;
+
+      pipeline->cs_regalloc_hang_bug = cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
+   }
+
+   radv_compute_generate_pm4(pipeline);
+}
+
 VkResult
 radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
                              const VkComputePipelineCreateInfo *pCreateInfo,
-                             const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash,
-                             struct radv_pipeline_shader_stack_size *rt_stack_sizes,
-                             uint32_t rt_group_count, VkPipeline *pPipeline)
+                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
 {
    RADV_FROM_HANDLE(radv_device, device, _device);
    RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
@@ -6539,46 +6561,26 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (pipeline == NULL) {
-      free(rt_stack_sizes);
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    }
 
    radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_COMPUTE);
 
-   pipeline->rt_stack_sizes = rt_stack_sizes;
-   pipeline->group_count = rt_group_count;
-
    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
 
    struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
 
    UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE;
-   result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key, &pCreateInfo->stage,
-                                1, pCreateInfo->flags, custom_hash, creation_feedback,
-                                &pipeline->rt_stack_sizes, &pipeline->group_count,
-                                &last_vgt_api_stage);
+   result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key,
+                                &pCreateInfo->stage, 1, pCreateInfo->flags, NULL, creation_feedback,
+                                NULL, NULL, &last_vgt_api_stage);
    if (result != VK_SUCCESS) {
       radv_pipeline_destroy(device, &pipeline->base, pAllocator);
       return result;
    }
 
-   pipeline->base.user_data_0[MESA_SHADER_COMPUTE] = R_00B900_COMPUTE_USER_DATA_0;
-   pipeline->base.need_indirect_descriptor_sets |=
-      radv_shader_need_indirect_descriptor_sets(&pipeline->base, MESA_SHADER_COMPUTE);
-   radv_pipeline_init_scratch(device, &pipeline->base);
-
-   pipeline->base.push_constant_size = pipeline_layout->push_constant_size;
-   pipeline->base.dynamic_offset_count = pipeline_layout->dynamic_offset_count;
-
-   if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
-      struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
-      unsigned *cs_block_size = compute_shader->info.cs.block_size;
-
-      pipeline->cs_regalloc_hang_bug = cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
-   }
-
-   radv_compute_generate_pm4(pipeline);
+   radv_compute_pipeline_init(pipeline, pipeline_layout);
 
    *pPipeline = radv_pipeline_to_handle(&pipeline->base);
 
@@ -6595,8 +6597,8 @@ radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uin
    unsigned i = 0;
    for (; i < count; i++) {
       VkResult r;
-      r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL,
-                                       NULL, 0, &pPipelines[i]);
+      r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
+                                       &pPipelines[i]);
       if (r != VK_SUCCESS) {
          result = r;
          pPipelines[i] = VK_NULL_HANDLE;
@@ -6612,6 +6614,53 @@ radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uin
    return result;
 }
 
+VkResult
+radv_rt_pipeline_create_(VkDevice _device, VkPipelineCache _cache,
+                         const VkComputePipelineCreateInfo *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash,
+                         struct radv_pipeline_shader_stack_size *rt_stack_sizes,
+                         uint32_t rt_group_count, VkPipeline *pPipeline)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+   RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
+   struct radv_ray_tracing_pipeline *pipeline;
+   VkResult result;
+
+   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pipeline == NULL) {
+      free(rt_stack_sizes);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_RAY_TRACING);
+
+   pipeline->stack_sizes = rt_stack_sizes;
+   pipeline->group_count = rt_group_count;
+
+   const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
+      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
+
+   struct radv_pipeline_key key = radv_generate_compute_pipeline_key(&pipeline->base, pCreateInfo);
+
+   UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE;
+   result = radv_create_shaders(&pipeline->base.base, pipeline_layout, device, cache, &key,
+                                &pCreateInfo->stage, 1, pCreateInfo->flags, custom_hash,
+                                creation_feedback, &pipeline->stack_sizes, &pipeline->group_count,
+                                &last_vgt_api_stage);
+   if (result != VK_SUCCESS) {
+      radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
+      return result;
+   }
+
+   radv_compute_pipeline_init(&pipeline->base, pipeline_layout);
+
+   *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
+
+   return VK_SUCCESS;
+}
+
 static uint32_t
 radv_get_executable_count(struct radv_pipeline *pipeline)
 {
index fa08bc6..f2b7bcc 100644 (file)
@@ -1713,7 +1713,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
    RADV_FROM_HANDLE(radv_device, device, _device);
    VkResult result;
    struct radv_pipeline *pipeline = NULL;
-   struct radv_compute_pipeline *compute_pipeline = NULL;
+   struct radv_ray_tracing_pipeline *rt_pipeline = NULL;
    struct radv_pipeline_shader_stack_size *stack_sizes = NULL;
    uint8_t hash[20];
    nir_shader *shader = NULL;
@@ -1757,8 +1757,8 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
 
    /* First check if we can get things from the cache before we take the expensive step of
     * generating the nir. */
-   result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
-                                         stack_sizes, local_create_info.groupCount, pPipeline);
+   result = radv_rt_pipeline_create_(_device, _cache, &compute_info, pAllocator, hash, stack_sizes,
+                                     local_create_info.groupCount, pPipeline);
 
    if (result == VK_PIPELINE_COMPILE_REQUIRED) {
       if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
@@ -1773,24 +1773,24 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
       shader = create_rt_shader(device, &local_create_info, stack_sizes);
       module.nir = shader;
       compute_info.flags = pCreateInfo->flags;
-      result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
-                                            stack_sizes, local_create_info.groupCount, pPipeline);
+      result = radv_rt_pipeline_create_(_device, _cache, &compute_info, pAllocator, hash,
+                                        stack_sizes, local_create_info.groupCount, pPipeline);
       stack_sizes = NULL;
 
       if (result != VK_SUCCESS)
          goto shader_fail;
    }
    pipeline = radv_pipeline_from_handle(*pPipeline);
-   compute_pipeline = radv_pipeline_to_compute(pipeline);
+   rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
 
-   compute_pipeline->rt_group_handles =
-      calloc(sizeof(*compute_pipeline->rt_group_handles), local_create_info.groupCount);
-   if (!compute_pipeline->rt_group_handles) {
+   rt_pipeline->group_handles =
+      calloc(sizeof(*rt_pipeline->group_handles), local_create_info.groupCount);
+   if (!rt_pipeline->group_handles) {
       result = VK_ERROR_OUT_OF_HOST_MEMORY;
       goto shader_fail;
    }
 
-   compute_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
+   rt_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
 
    /* For General and ClosestHit shaders, we can use the shader ID directly as handle.
     * As (potentially different) AnyHit shaders are inlined, for Intersection shaders
@@ -1801,17 +1801,17 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
       switch (group_info->type) {
       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
          if (group_info->generalShader != VK_SHADER_UNUSED_KHR)
-            compute_pipeline->rt_group_handles[i].handles[0] = group_info->generalShader + 2;
+            rt_pipeline->group_handles[i].handles[0] = group_info->generalShader + 2;
          break;
       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
          if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR)
-            compute_pipeline->rt_group_handles[i].handles[1] = i + 2;
+            rt_pipeline->group_handles[i].handles[1] = i + 2;
          FALLTHROUGH;
       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
          if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
-            compute_pipeline->rt_group_handles[i].handles[0] = group_info->closestHitShader + 2;
+            rt_pipeline->group_handles[i].handles[0] = group_info->closestHitShader + 2;
          if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
-            compute_pipeline->rt_group_handles[i].handles[1] = i + 2;
+            rt_pipeline->group_handles[i].handles[1] = i + 2;
          break;
       case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
          unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
@@ -1862,16 +1862,16 @@ radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, u
                                         uint32_t groupCount, size_t dataSize, void *pData)
 {
    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
-   struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
+   struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
    char *data = pData;
 
-   STATIC_ASSERT(sizeof(*compute_pipeline->rt_group_handles) <= RADV_RT_HANDLE_SIZE);
+   STATIC_ASSERT(sizeof(*rt_pipeline->group_handles) <= RADV_RT_HANDLE_SIZE);
 
    memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
 
    for (uint32_t i = 0; i < groupCount; ++i) {
-      memcpy(data + i * RADV_RT_HANDLE_SIZE, &compute_pipeline->rt_group_handles[firstGroup + i],
-             sizeof(*compute_pipeline->rt_group_handles));
+      memcpy(data + i * RADV_RT_HANDLE_SIZE, &rt_pipeline->group_handles[firstGroup + i],
+             sizeof(*rt_pipeline->group_handles));
    }
 
    return VK_SUCCESS;
@@ -1882,9 +1882,8 @@ radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline,
                                           VkShaderGroupShaderKHR groupShader)
 {
    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
-   struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
-   const struct radv_pipeline_shader_stack_size *stack_size =
-      &compute_pipeline->rt_stack_sizes[group];
+   struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
+   const struct radv_pipeline_shader_stack_size *stack_size = &rt_pipeline->stack_sizes[group];
 
    if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR ||
        groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR)
index 9267249..81b3383 100644 (file)
@@ -1465,7 +1465,7 @@ struct radv_cmd_state {
    struct radv_graphics_pipeline *emitted_graphics_pipeline;
    struct radv_compute_pipeline *compute_pipeline;
    struct radv_compute_pipeline *emitted_compute_pipeline;
-   struct radv_compute_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
+   struct radv_ray_tracing_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
    struct radv_dynamic_state dynamic;
    struct radv_vs_input_state dynamic_vs_input;
    struct radv_streamout_state streamout;
@@ -1924,10 +1924,12 @@ struct radv_binning_state {
 enum radv_pipeline_type {
    RADV_PIPELINE_GRAPHICS,
    RADV_PIPELINE_GRAPHICS_LIB,
-   /* Compute pipeline (incl raytracing pipeline) */
+   /* Compute pipeline */
    RADV_PIPELINE_COMPUTE,
    /* Pipeline library. This can't actually run and merely is a partial pipeline. */
-   RADV_PIPELINE_LIBRARY
+   RADV_PIPELINE_LIBRARY,
+   /* Raytracing pipeline */
+   RADV_PIPELINE_RAY_TRACING,
 };
 
 struct radv_pipeline_group_handle {
@@ -2063,12 +2065,6 @@ struct radv_compute_pipeline {
    struct radv_pipeline base;
 
    bool cs_regalloc_hang_bug;
-
-   /* Raytracing */
-   struct radv_pipeline_group_handle *rt_group_handles;
-   struct radv_pipeline_shader_stack_size *rt_stack_sizes;
-   bool dynamic_stack_size;
-   uint32_t group_count;
 };
 
 struct radv_library_pipeline {
@@ -2094,6 +2090,15 @@ struct radv_graphics_lib_pipeline {
    VkGraphicsPipelineLibraryFlagsEXT lib_flags;
 };
 
+struct radv_ray_tracing_pipeline {
+   struct radv_compute_pipeline base;
+
+   struct radv_pipeline_group_handle *group_handles;
+   struct radv_pipeline_shader_stack_size *stack_sizes;
+   uint32_t group_count;
+   bool dynamic_stack_size;
+};
+
 #define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)            \
    static inline struct radv_##pipe_type##_pipeline *                \
    radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline)      \
@@ -2106,6 +2111,7 @@ RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
 RADV_DECL_PIPELINE_DOWNCAST(graphics_lib, RADV_PIPELINE_GRAPHICS_LIB)
 RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
 RADV_DECL_PIPELINE_DOWNCAST(library, RADV_PIPELINE_LIBRARY)
+RADV_DECL_PIPELINE_DOWNCAST(ray_tracing, RADV_PIPELINE_RAY_TRACING)
 
 struct radv_pipeline_stage {
    gl_shader_stage stage;
@@ -2173,9 +2179,14 @@ VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
                                       const VkComputePipelineCreateInfo *pCreateInfo,
                                       const VkAllocationCallbacks *pAllocator,
-                                      const uint8_t *custom_hash,
-                                      struct radv_pipeline_shader_stack_size *rt_stack_sizes,
-                                      uint32_t rt_group_count, VkPipeline *pPipeline);
+                                      VkPipeline *pPipeline);
+
+VkResult radv_rt_pipeline_create_(VkDevice _device, VkPipelineCache _cache,
+                                  const VkComputePipelineCreateInfo *pCreateInfo,
+                                  const VkAllocationCallbacks *pAllocator,
+                                  const uint8_t *custom_hash,
+                                  struct radv_pipeline_shader_stack_size *rt_stack_sizes,
+                                  uint32_t rt_group_count, VkPipeline *pPipeline);
 
 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
                            const VkAllocationCallbacks *allocator);