radv/sqtt: Handle separately-compiled RT pipelines
authorFriedrich Vock <friedrich.vock@gmx.de>
Fri, 28 Jul 2023 15:45:24 +0000 (17:45 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 22 Aug 2023 11:33:11 +0000 (11:33 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24371>

src/amd/vulkan/layers/radv_sqtt_layer.c

index cfcd49f26c5d588f9e0bb0341c9dc39ef0d07c7a..e372c7139852ca9d74c4aa1ec515f095fded556d 100644 (file)
@@ -1110,6 +1110,7 @@ radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data
                                ? 1024
                                : pdevice->rad_info.lds_encode_granularity;
 
+   memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
    shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
    shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
    shader_data->code_size = shader->code_size;
@@ -1162,6 +1163,128 @@ radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
    return VK_SUCCESS;
 }
 
+static VkResult
+radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
+                   struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
+                   uint32_t index, uint64_t hash)
+{
+   struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
+   if (!record)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
+
+   record->shader_stages_mask = 0;
+   record->num_shaders_combined = 0;
+   record->pipeline_hash[0] = hash;
+   record->pipeline_hash[1] = hash;
+
+   radv_fill_code_object_record(device, shader_data, shader, shader->va);
+   shader_data->rt_stack_size = stack_size;
+
+   record->shader_stages_mask |= (1 << shader->info.stage);
+   record->is_rt = true;
+   switch (shader->info.stage) {
+   case MESA_SHADER_RAYGEN:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
+      break;
+   case MESA_SHADER_CLOSEST_HIT:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
+      break;
+   case MESA_SHADER_MISS:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
+      break;
+   case MESA_SHADER_INTERSECTION:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
+      break;
+   case MESA_SHADER_CALLABLE:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
+      break;
+   case MESA_SHADER_COMPUTE:
+      snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
+      break;
+   default:
+      unreachable("invalid rt stage");
+   }
+   record->num_shaders_combined = 1;
+
+   simple_mtx_lock(&code_object->lock);
+   list_addtail(&record->list, &code_object->record);
+   code_object->record_count++;
+   simple_mtx_unlock(&code_object->lock);
+
+   return VK_SUCCESS;
+}
+
+static void
+compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
+{
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+   _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
+   _mesa_sha1_update(&ctx, &index, sizeof(index));
+   _mesa_sha1_final(&ctx, sha1);
+}
+
+static VkResult
+radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
+                       uint32_t stack_size, struct radv_shader *shader)
+{
+   unsigned char sha1[SHA1_DIGEST_LENGTH];
+   VkResult result;
+
+   compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
+
+   result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
+   if (!result)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
+   if (!result)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   result =
+      radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
+   return result;
+}
+
+static VkResult
+radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
+{
+   VkResult result = VK_SUCCESS;
+
+   uint32_t max_any_hit_stack_size = 0;
+   uint32_t max_intersection_stack_size = 0;
+
+   for (unsigned i = 0; i < pipeline->stage_count; i++) {
+      struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
+      if (!radv_ray_tracing_stage_is_compiled(stage)) {
+         if (stage->stage == MESA_SHADER_ANY_HIT)
+            max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
+         else if (stage->stage == MESA_SHADER_INTERSECTION)
+            max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
+         else
+            unreachable("invalid non-compiled stage");
+         continue;
+      }
+      struct radv_shader *shader = container_of(pipeline->stages[i].shader, struct radv_shader, base);
+      result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, shader);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   uint32_t idx = pipeline->stage_count;
+
+   /* Combined traversal shader */
+   result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
+                                   pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Prolog */
+   result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
+
+   return result;
+}
+
 static VkResult
 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
 {
@@ -1340,7 +1463,7 @@ sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR defer
       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
          continue;
 
-      result = radv_register_pipeline(device, pipeline);
+      result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
       if (result != VK_SUCCESS)
          goto fail;
    }
@@ -1364,7 +1487,17 @@ sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationC
    if (!_pipeline)
       return;
 
-   radv_unregister_records(device, pipeline->pipeline_hash);
+   /* Ray tracing pipelines have multiple records, each with their own hash */
+   if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
+      /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
+      uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
+      unsigned char sha1[SHA1_DIGEST_LENGTH];
+      for (uint32_t i = 0; i < record_count; ++i) {
+         compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
+         radv_unregister_records(device, *(uint64_t *)sha1);
+      }
+   } else
+      radv_unregister_records(device, pipeline->pipeline_hash);
 
    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);