From c89ca3b47f11ce2c2e6953d37590021e89c1d119 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 23 Aug 2022 19:23:09 -0400 Subject: [PATCH] radeonsi: change si_emit_derived_tess_state into a state atom This splits the state into an update function and an emit function setting the registers, and only 2 functions update it: set_patch_vertices and si_update_shaders. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 6 ++ src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_draw.cpp | 106 ++++++++++++++-------- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 2 + 5 files changed, 80 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 51b6cce..5a83b1f 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -520,6 +520,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); si_mark_atom_dirty(ctx, &ctx->atoms.s.vgt_pipeline_state); + si_mark_atom_dirty(ctx, &ctx->atoms.s.tess_io_layout); if (has_clear_state) { si_set_tracked_regs_to_clear_state(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8367a05..d520e0d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1187,6 +1187,12 @@ struct si_context { unsigned last_tes_sh_base; bool last_tess_uses_primid; unsigned num_patches_per_workgroup; + unsigned tcs_out_layout; + unsigned tcs_out_offsets; + unsigned tcs_offchip_layout; + unsigned tes_offchip_ring_va_sgpr; + unsigned ls_hs_rsrc2; + unsigned ls_hs_config; unsigned last_ls_hs_config; /* Debug state. */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 86a59b7..789cf1e 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -210,6 +210,7 @@ union si_state_atoms { struct si_atom shader_query; struct si_atom ngg_cull_state; struct si_atom vgt_pipeline_state; + struct si_atom tess_io_layout; } s; struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)]; }; @@ -586,6 +587,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx); bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes); unsigned si_get_shader_prefetch_size(struct si_shader *shader); bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx); +void si_update_tess_io_layout_state(struct si_context *sctx); /* si_state_draw.cpp */ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index cc80c9c..7660f3b 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -295,6 +295,9 @@ static bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_locations); } + if (HAS_TESS) + si_update_tess_io_layout_state(sctx); + if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt)) { /* Pretend the bound shaders form a vk pipeline. Include the scratch size in * the hash calculation to force re-emitting the pipeline if the scratch bo @@ -626,14 +629,23 @@ static void si_prefetch_shaders(struct si_context *sctx) sctx->prefetch_L2_mask = 0; } +#if GFX_VER == 6 /* declare these functions only once because they support all chips. */ + /** * This calculates the LDS size for tessellation shaders (VS, TCS, TES). * LS.LDS_SIZE is shared by all 3 shader stages. * * The information about LDS and other non-compile-time parameters is then * written to userdata SGPRs. + * + * This depends on: + * - patch_vertices + * - VS and the currently selected shader variant (called by si_update_shaders) + * - TCS and the currently selected shader variant (called by si_update_shaders) + * - tess_uses_prim_id (called by si_update_shaders) + * - sh_base[TESS_EVAL] depending on GS on/off (called by si_update_shaders) */ -static void si_emit_derived_tess_state(struct si_context *sctx) +void si_update_tess_io_layout_state(struct si_context *sctx) { struct si_shader *ls_current; struct si_shader_selector *ls; @@ -643,6 +655,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx) unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; uint8_t num_tcs_input_cp = sctx->patch_vertices; + assert(sctx->shader.tcs.current); + /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */ if (sctx->gfx_level >= GFX9) { ls_current = sctx->shader.tcs.current; @@ -782,9 +796,10 @@ static void si_emit_derived_tess_state(struct si_context *sctx) si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address; assert((ring_va & u_bit_consecutive(0, 19)) == 0); - unsigned tcs_out_layout = (num_tcs_input_cp << 13) | ring_va; - unsigned tcs_out_offsets = ((perpatch_output_offset / 4) << 16); - unsigned offchip_layout = + sctx->tes_offchip_ring_va_sgpr = ring_va; + sctx->tcs_out_layout = (num_tcs_input_cp << 13) | ring_va; + sctx->tcs_out_offsets = ((perpatch_output_offset / 4) << 16); + sctx->tcs_offchip_layout = (num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((pervertex_output_patch_size * num_patches) << 11); @@ -807,70 +822,87 @@ static void si_emit_derived_tess_state(struct si_context *sctx) * been tested. */ assert(ls_current->config.lds_size == 0); - struct radeon_cmdbuf *cs = &sctx->gfx_cs; - radeon_begin(cs); + unsigned ls_hs_rsrc2; if (sctx->gfx_level >= GFX9) { - unsigned hs_rsrc2 = ls_current->config.rsrc2; + ls_hs_rsrc2 = sctx->shader.tcs.current->config.rsrc2; if (sctx->gfx_level >= GFX10) - hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(lds_size); + ls_hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(lds_size); else - hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(lds_size); + ls_hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(lds_size); + } else { + ls_hs_rsrc2 = sctx->shader.vs.current->config.rsrc2; + + si_multiwave_lds_size_workaround(sctx->screen, &lds_size); + ls_hs_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); + } + + sctx->ls_hs_rsrc2 = ls_hs_rsrc2; + sctx->ls_hs_config = + S_028B58_NUM_PATCHES(sctx->num_patches_per_workgroup) | + S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | + S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); + + si_mark_atom_dirty(sctx, &sctx->atoms.s.tess_io_layout); +} + +static void si_emit_tess_io_layout_state(struct si_context *sctx) +{ + struct radeon_cmdbuf *cs = &sctx->gfx_cs; + radeon_begin(cs); - radeon_set_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); + if (!sctx->shader.tes.cso || !sctx->shader.tcs.current) + return; + + if (sctx->gfx_level >= GFX9) { + radeon_set_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2); /* Set userdata SGPRs for merged LS-HS. */ radeon_set_sh_reg_seq( R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3); - radeon_emit(offchip_layout); - radeon_emit(tcs_out_offsets); - radeon_emit(tcs_out_layout); + radeon_emit(sctx->tcs_offchip_layout); + radeon_emit(sctx->tcs_out_offsets); + radeon_emit(sctx->tcs_out_layout); } else { - unsigned ls_rsrc2 = ls_current->config.rsrc2; - - si_multiwave_lds_size_workaround(sctx->screen, &lds_size); - ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); - /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ if (sctx->gfx_level == GFX7 && sctx->family != CHIP_HAWAII) - radeon_set_sh_reg(R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); + radeon_set_sh_reg(R_00B52C_SPI_SHADER_PGM_RSRC2_LS, sctx->ls_hs_rsrc2); radeon_set_sh_reg_seq(R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); - radeon_emit(ls_current->config.rsrc1); - radeon_emit(ls_rsrc2); + radeon_emit(sctx->shader.vs.current->config.rsrc1); + radeon_emit(sctx->ls_hs_rsrc2); /* Set userdata SGPRs for TCS. */ radeon_set_sh_reg_seq( R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); - radeon_emit(offchip_layout); - radeon_emit(tcs_out_offsets); - radeon_emit(tcs_out_layout); + radeon_emit(sctx->tcs_offchip_layout); + radeon_emit(sctx->tcs_out_offsets); + radeon_emit(sctx->tcs_out_layout); radeon_emit(sctx->current_vs_state); } /* Set userdata SGPRs for TES. */ + unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; + assert(tes_sh_base); + radeon_set_sh_reg_seq(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2); - radeon_emit(offchip_layout); - radeon_emit(ring_va); + radeon_emit(sctx->tcs_offchip_layout); + radeon_emit(sctx->tes_offchip_ring_va_sgpr); radeon_end(); - unsigned ls_hs_config = - S_028B58_NUM_PATCHES(num_patches) | - S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | - S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); - - if (sctx->last_ls_hs_config != ls_hs_config) { + if (sctx->last_ls_hs_config != sctx->ls_hs_config) { radeon_begin(cs); if (sctx->gfx_level >= GFX7) { - radeon_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); + radeon_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG, 2, sctx->ls_hs_config); } else { - radeon_set_context_reg(R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); + radeon_set_context_reg(R_028B58_VGT_LS_HS_CONFIG, sctx->ls_hs_config); } radeon_end_update_context_roll(sctx); - sctx->last_ls_hs_config = ls_hs_config; + sctx->last_ls_hs_config = sctx->ls_hs_config; } } +#endif static unsigned si_num_prims_for_vertices(enum mesa_prim prim, unsigned count, unsigned vertices_per_patch) @@ -2128,8 +2160,6 @@ ALWAYS_INLINE static void si_emit_all_states(struct si_context *sctx, unsigned skip_atom_mask) { si_emit_rasterizer_prim_state(sctx); - if (HAS_TESS) - si_emit_derived_tess_state(sctx); /* Emit state atoms. */ unsigned mask = sctx->dirty_atoms & ~skip_atom_mask; @@ -2728,6 +2758,8 @@ void si_init_spi_map_functions(struct si_context *sctx) sctx->emit_spi_map[30] = si_emit_spi_map<30>; sctx->emit_spi_map[31] = si_emit_spi_map<31>; sctx->emit_spi_map[32] = si_emit_spi_map<32>; + + sctx->atoms.s.tess_io_layout.emit = si_emit_tess_io_layout_state; } #endif diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 927bc83..270910e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4267,6 +4267,8 @@ static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertic if (sctx->patch_vertices != patch_vertices) { sctx->patch_vertices = patch_vertices; si_update_tess_in_out_patch_vertices(sctx); + if (sctx->shader.tcs.current) + si_update_tess_io_layout_state(sctx); } } -- 2.7.4