From 8a97302f57fede4791acdb504bad449508fbfc01 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 7 Aug 2023 14:31:34 +0200 Subject: [PATCH] radv: add support for loading the LSHS vertex stride from a SGPR With shader object, if VS and TCS aren't linked together, the LSHS vertex stride should be computed from the vertex outputs. Otherwise, if an output is unused, the stride is wrong in TCS. This is currently for GFX8 only because for merged shaders this won't be needed but shader object on GFX9+ isn't yet a thing. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/nir/radv_nir_lower_abi.c | 14 ++++++++++++-- src/amd/vulkan/radv_cmd_buffer.c | 6 ++++-- src/amd/vulkan/radv_shader.h | 2 ++ src/amd/vulkan/radv_shader_args.c | 4 ++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_lower_abi.c b/src/amd/vulkan/nir/radv_nir_lower_abi.c index 8cb5493..38a58b2 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_abi.c +++ b/src/amd/vulkan/nir/radv_nir_lower_abi.c @@ -273,8 +273,18 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); break; case nir_intrinsic_load_lshs_vertex_stride_amd: { - unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs : s->info->tcs.num_linked_inputs; - replacement = nir_imm_int(b, get_tcs_input_vertex_stride(io_num)); + if (stage == MESA_SHADER_VERTEX) { + replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->vs.num_linked_outputs)); + } else { + assert(stage == MESA_SHADER_TESS_CTRL); + if (s->info->inputs_linked) { + replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs)); + } else { + nir_ssa_def *lshs_vertex_stride = + GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE); + replacement = nir_ishl_imm(b, lshs_vertex_stride, 2); + } + } break; } case nir_intrinsic_load_esgs_vertex_stride_amd: { diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ab78075..eab7956 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2507,6 +2507,7 @@ static void radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) { const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; + const struct radv_shader *vs = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned ls_hs_config, base_reg; @@ -2549,7 +2550,6 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); } else { - struct radv_shader *vs = cmd_buffer->state.shaders[MESA_SHADER_VERTEX]; unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size); radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); @@ -2564,7 +2564,9 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) unsigned tcs_offchip_layout = SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS, d->vk.ts.patch_control_points) | - SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches); + SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches) | + SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE, + get_tcs_input_vertex_stride(vs->info.vs.num_linked_outputs) / 4); base_reg = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]->info.user_data_0; radeon_set_sh_reg(cmd_buffer->cs, base_reg + offchip->sgpr_idx * 4, tcs_offchip_layout); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 042cab6..b1d2cd6 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -201,6 +201,8 @@ enum radv_ud_index { #define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f #define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6 #define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff +#define TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE__SHIFT 14 +#define TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE__MASK 0xff /* max 32 * 4 + 1 (to reduce LDS bank conflicts) */ #define TES_STATE_NUM_PATCHES__SHIFT 0 #define TES_STATE_NUM_PATCHES__MASK 0xff diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 97ef4c7..3d0eea2 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -355,8 +355,8 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg static bool radv_tcs_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_pipeline_key *key) { - /* When the number of patch control points/tessellation patches is 0, it's loaded from a SGPR. */ - return !key->tcs.tess_input_vertices || !info->num_tess_patches; + /* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */ + return !key->tcs.tess_input_vertices || !info->num_tess_patches || !info->inputs_linked; } static bool -- 2.7.4