uint64_t inline_push_const_mask;
struct ac_arg view_index;
struct ac_arg sbt_descriptors;
- struct ac_arg ray_launch_size;
+ struct ac_arg ray_launch_size_addr;
struct ac_arg force_vrs_rates;
};
emit_split_vector(ctx, dst, 3);
break;
}
- case nir_intrinsic_load_ray_launch_size: {
+ case nir_intrinsic_load_ray_launch_size_addr_amd: {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
- bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.ray_launch_size)));
- emit_split_vector(ctx, dst, 3);
+ Temp addr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.ray_launch_size_addr));
+ bld.copy(Definition(dst), Operand(addr));
break;
}
case nir_intrinsic_load_local_invocation_id: {
case nir_intrinsic_load_push_constant:
case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_num_workgroups:
- case nir_intrinsic_load_ray_launch_size:
+ case nir_intrinsic_load_ray_launch_size_addr_amd:
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_load_num_subgroups:
case nir_intrinsic_load_first_vertex:
}
static bool
-radv_rt_bind_tables(struct radv_cmd_buffer *cmd_buffer,
- const VkStridedDeviceAddressRegionKHR *tables)
+radv_rt_set_args(struct radv_cmd_buffer *cmd_buffer,
+ const VkStridedDeviceAddressRegionKHR *tables, uint64_t launch_size_va,
+ struct radv_dispatch_info *info)
{
struct radv_pipeline *pipeline = cmd_buffer->state.rt_pipeline;
- uint32_t base_reg;
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_COMPUTE];
void *ptr;
- uint32_t *desc_ptr;
+ uint32_t *write_ptr;
uint32_t offset;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 64, &offset, &ptr))
+ info->unaligned = true;
+
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 64 + (launch_size_va ? 0 : 12), &offset, &ptr))
return false;
- desc_ptr = ptr;
- for (unsigned i = 0; i < 4; ++i, desc_ptr += 4) {
- desc_ptr[0] = tables[i].deviceAddress;
- desc_ptr[1] = tables[i].deviceAddress >> 32;
- desc_ptr[2] = tables[i].stride;
- desc_ptr[3] = 0;
+ write_ptr = ptr;
+ for (unsigned i = 0; i < 4; ++i, write_ptr += 4) {
+ write_ptr[0] = tables[i].deviceAddress;
+ write_ptr[1] = tables[i].deviceAddress >> 32;
+ write_ptr[2] = tables[i].stride;
+ write_ptr[3] = 0;
+ }
+
+ if (!launch_size_va) {
+ write_ptr[0] = info->blocks[0];
+ write_ptr[1] = info->blocks[1];
+ write_ptr[2] = info->blocks[2];
+ } else {
+ info->va = launch_size_va;
}
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
- struct radv_userdata_info *loc =
+
+ struct radv_userdata_info *desc_loc =
radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_SBT_DESCRIPTORS);
- if (loc->sgpr_idx == -1)
- return true;
+ if (desc_loc->sgpr_idx != -1) {
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+ base_reg + desc_loc->sgpr_idx * 4, va, false);
+ }
- base_reg = pipeline->user_data_0[MESA_SHADER_COMPUTE];
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
- false);
+ struct radv_userdata_info *size_loc =
+ radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
+ if (size_loc->sgpr_idx != -1) {
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+ base_reg + size_loc->sgpr_idx * 4, launch_size_va ? launch_size_va : (va + 64), false);
+ }
+
return true;
}
info.blocks[0] = width;
info.blocks[1] = height;
info.blocks[2] = depth;
- info.unaligned = 1;
const VkStridedDeviceAddressRegionKHR tables[] = {
*pRaygenShaderBindingTable,
*pCallableShaderBindingTable,
};
- if (!radv_rt_bind_tables(cmd_buffer, tables)) {
+ if (!radv_rt_set_args(cmd_buffer, tables, 0, &info))
return;
- }
-
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(
- cmd_buffer->state.rt_pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE);
-
- if (loc->sgpr_idx != -1) {
- assert(loc->num_sgprs == 3);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
- radeon_emit(cmd_buffer->cs, width);
- radeon_emit(cmd_buffer->cs, height);
- radeon_emit(cmd_buffer->cs, depth);
- }
radv_rt_dispatch(cmd_buffer, &info);
}
nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
break;
}
+ case nir_intrinsic_load_ray_launch_size: {
+ b_shader.cursor = nir_instr_remove(instr);
+ nir_ssa_def *launch_size_addr =
+ nir_load_ray_launch_size_addr_amd(&b_shader);
+
+ nir_ssa_def * xy = nir_build_load_smem_amd(
+ &b_shader, 2, launch_size_addr, nir_imm_int(&b_shader, 0));
+ nir_ssa_def * z = nir_build_load_smem_amd(
+ &b_shader, 1, launch_size_addr, nir_imm_int(&b_shader, 8));
+
+ nir_ssa_def *xyz[3] = {
+ nir_channel(&b_shader, xy, 0),
+ nir_channel(&b_shader, xy, 1),
+ z,
+ };
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(&b_shader, xyz, 3));
+ break;
+ }
case nir_intrinsic_load_ray_t_min: {
b_shader.cursor = nir_instr_remove(instr);
nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmin);
AC_UD_PS_MAX_UD,
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_SBT_DESCRIPTORS,
- AC_UD_CS_RAY_LAUNCH_SIZE,
+ AC_UD_CS_RAY_LAUNCH_SIZE_ADDR,
AC_UD_CS_TASK_RING_OFFSETS,
AC_UD_CS_TASK_DRAW_ID,
AC_UD_CS_TASK_IB,
if (info->cs.uses_grid_size)
user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
if (info->cs.uses_ray_launch_size)
- user_sgpr_count += 3;
+ user_sgpr_count++;
if (info->vs.needs_draw_id)
user_sgpr_count += 1;
if (info->cs.uses_task_rings)
}
if (info->cs.uses_ray_launch_size) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr);
}
if (info->vs.needs_draw_id) {
set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx,
args->load_grid_size_from_user_sgpr ? 3 : 2);
}
- if (args->ac.ray_launch_size.used) {
- set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
+ if (args->ac.ray_launch_size_addr.used) {
+ set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx);
}
if (args->ac.draw_id.used) {
set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1);
case MESA_SHADER_TASK:
for (int i = 0; i < 3; ++i)
info->cs.block_size[i] = nir->info.workgroup_size[i];
- info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE);
+ info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD);
/* Task shaders always need these for the I/O lowering even if
* the API shader doesn't actually use them.