struct ac_arg sbt_descriptors;
struct ac_arg ray_launch_size_addr;
struct ac_arg force_vrs_rates;
+ struct ac_arg rt_dynamic_callable_stack_base;
};
void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers,
break;
}
case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break;
+ case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
+ bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
+ get_arg(ctx, ctx->args->ac.rt_dynamic_callable_stack_base));
+ break;
case nir_intrinsic_overwrite_vs_arguments_amd: {
ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
base_reg + size_loc->sgpr_idx * 4, launch_size_va, true);
}
+ struct radv_userdata_info *base_loc = radv_lookup_user_sgpr(
+ &pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);
+ if (base_loc->sgpr_idx != -1) {
+ struct radv_shader_info *cs_info = &pipeline->base.shaders[MESA_SHADER_COMPUTE]->info;
+ radeon_set_sh_reg(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + base_loc->sgpr_idx * 4,
+ pipeline->base.scratch_bytes_per_wave / cs_info->wave_size);
+ }
+
radv_dispatch(cmd_buffer, &info, pipeline, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
}
struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes);
load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0);
- nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
+ if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
+ nir_store_var(&b, vars.stack_ptr, nir_load_rt_dynamic_callable_stack_base_amd(&b), 0x1);
+ else
+ nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1);
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_SBT_DESCRIPTORS,
AC_UD_CS_RAY_LAUNCH_SIZE_ADDR,
+ AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE,
AC_UD_CS_TASK_RING_OFFSETS,
AC_UD_CS_TASK_DRAW_ID,
AC_UD_CS_TASK_IB,
bool uses_sbt;
bool uses_ray_launch_size;
+ bool uses_dynamic_rt_callable_stack;
} cs;
struct {
uint64_t tes_inputs_read;
user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
if (info->cs.uses_ray_launch_size)
user_sgpr_count += 2;
+ if (info->cs.uses_dynamic_rt_callable_stack)
+ user_sgpr_count += 1;
if (info->vs.needs_draw_id)
user_sgpr_count += 1;
if (stage == MESA_SHADER_TASK)
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr);
}
+ if (info->cs.uses_dynamic_rt_callable_stack) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->ac.rt_dynamic_callable_stack_base);
+ }
+
if (info->vs.needs_draw_id) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
}
if (args->ac.ray_launch_size_addr.used) {
set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx);
}
+ if (args->ac.rt_dynamic_callable_stack_base.used) {
+ set_loc_shader(args, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE, &user_sgpr_idx, 1);
+ }
if (args->ac.draw_id.used) {
set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1);
}
case nir_intrinsic_load_force_vrs_rates_amd:
info->force_vrs_per_vertex = true;
break;
+ case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
+ info->cs.uses_dynamic_rt_callable_stack = true;
+ break;
default:
break;
}
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_scalar_arg_amd:
case nir_intrinsic_load_smem_amd:
+ case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
case nir_intrinsic_load_global_const_block_intel:
case nir_intrinsic_load_reloc_const_intel:
case nir_intrinsic_load_global_block_intel:
# Used for indirect ray tracing.
system_value("ray_launch_size_addr_amd", 1, bit_sizes=[64])
+# Scratch base of callable stack for ray tracing.
+system_value("rt_dynamic_callable_stack_base_amd", 1)
+
# Load forced VRS rates.
intrinsic("load_force_vrs_rates_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])