? 1024
: pdevice->rad_info.lds_encode_granularity;
+ memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
shader_data->code_size = shader->code_size;
return VK_SUCCESS;
}
+static VkResult
+radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
+ struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
+ uint32_t index, uint64_t hash)
+{
+ struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
+ if (!record)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
+
+ record->shader_stages_mask = 0;
+ record->num_shaders_combined = 0;
+ record->pipeline_hash[0] = hash;
+ record->pipeline_hash[1] = hash;
+
+ radv_fill_code_object_record(device, shader_data, shader, shader->va);
+ shader_data->rt_stack_size = stack_size;
+
+ record->shader_stages_mask |= (1 << shader->info.stage);
+ record->is_rt = true;
+ switch (shader->info.stage) {
+ case MESA_SHADER_RAYGEN:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
+ break;
+ case MESA_SHADER_CLOSEST_HIT:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
+ break;
+ case MESA_SHADER_MISS:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
+ break;
+ case MESA_SHADER_INTERSECTION:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
+ break;
+ case MESA_SHADER_CALLABLE:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
+ break;
+ case MESA_SHADER_COMPUTE:
+ snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
+ break;
+ default:
+ unreachable("invalid rt stage");
+ }
+ record->num_shaders_combined = 1;
+
+ simple_mtx_lock(&code_object->lock);
+ list_addtail(&record->list, &code_object->record);
+ code_object->record_count++;
+ simple_mtx_unlock(&code_object->lock);
+
+ return VK_SUCCESS;
+}
+
+static void
+compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
+{
+ struct mesa_sha1 ctx;
+ _mesa_sha1_init(&ctx);
+ _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
+ _mesa_sha1_update(&ctx, &index, sizeof(index));
+ _mesa_sha1_final(&ctx, sha1);
+}
+
+static VkResult
+radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
+ uint32_t stack_size, struct radv_shader *shader)
+{
+ unsigned char sha1[SHA1_DIGEST_LENGTH];
+ VkResult result;
+
+ compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
+
+ result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
+ if (!result)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
+ if (!result)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ result =
+ radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
+ return result;
+}
+
+static VkResult
+radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
+{
+ VkResult result = VK_SUCCESS;
+
+ uint32_t max_any_hit_stack_size = 0;
+ uint32_t max_intersection_stack_size = 0;
+
+ for (unsigned i = 0; i < pipeline->stage_count; i++) {
+ struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
+ if (!radv_ray_tracing_stage_is_compiled(stage)) {
+ if (stage->stage == MESA_SHADER_ANY_HIT)
+ max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
+ else if (stage->stage == MESA_SHADER_INTERSECTION)
+ max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
+ else
+ unreachable("invalid non-compiled stage");
+ continue;
+ }
+ struct radv_shader *shader = container_of(pipeline->stages[i].shader, struct radv_shader, base);
+ result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, shader);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ uint32_t idx = pipeline->stage_count;
+
+ /* Combined traversal shader */
+ result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
+ pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Prolog */
+ result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
+
+ return result;
+}
+
static VkResult
radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
{
if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
continue;
- result = radv_register_pipeline(device, pipeline);
+ result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
if (result != VK_SUCCESS)
goto fail;
}
if (!_pipeline)
return;
- radv_unregister_records(device, pipeline->pipeline_hash);
+ /* Ray tracing pipelines have multiple records, each with their own hash */
+ if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
+ /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
+ uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
+ unsigned char sha1[SHA1_DIGEST_LENGTH];
+ for (uint32_t i = 0; i < record_count; ++i) {
+ compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
+ radv_unregister_records(device, *(uint64_t *)sha1);
+ }
+ } else
+ radv_unregister_records(device, pipeline->pipeline_hash);
if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);