From: Marek Olšák Date: Sun, 16 Jul 2023 21:46:12 +0000 (-0400) Subject: radeonsi: move GE_CNTL emission from si_draw into si_emit_vgt_pipeline_state X-Git-Tag: upstream/23.3.3~3149 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=eb90fffa584e33b6c6169d0368111139a9812972;p=platform%2Fupstream%2Fmesa.git radeonsi: move GE_CNTL emission from si_draw into si_emit_vgt_pipeline_state It doesn't depend on pipe_draw_info since pipe_context::set_patch_vertices was added. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 656972d..a6bd3a7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1071,6 +1071,7 @@ struct si_context { bool cs_preamble_has_vgt_flush; bool cs_preamble_has_vgt_flush_tmz; uint32_t vgt_shader_stages_en; + uint32_t ge_cntl; /* shaders */ union { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index fab81d5..2585e69 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -183,8 +183,56 @@ static bool si_update_shaders(struct si_context *sctx) si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32); } - if (vgt_stages != sctx->vgt_shader_stages_en) { + /* Update GE_CNTL. */ + uint32_t ge_cntl = 0; + + if (GFX_VERSION >= GFX10) { + union si_vgt_param_key key = sctx->ia_multi_vgt_param_key; + + if (NGG) { + if (HAS_TESS) { + if (GFX_VERSION >= GFX11) { + ge_cntl = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ge_cntl | + S_03096C_BREAK_PRIMGRP_AT_EOI(key.u.tess_uses_prim_id); + } else { + /* PRIM_GRP_SIZE_GFX10 is set by si_emit_vgt_pipeline_state. */ + ge_cntl = S_03096C_VERT_GRP_SIZE(0) | + S_03096C_BREAK_WAVE_AT_EOI(key.u.tess_uses_prim_id); + } + } else { + ge_cntl = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ge_cntl; + } + } else { + unsigned primgroup_size; + unsigned vertgroup_size; + assert(GFX_VERSION < GFX11); + + if (HAS_TESS) { + primgroup_size = 0; /* this is set by si_emit_vgt_pipeline_state */ + vertgroup_size = 0; + } else if (HAS_GS) { + unsigned vgt_gs_onchip_cntl = sctx->shader.gs.current->gs.vgt_gs_onchip_cntl; + primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); + vertgroup_size = G_028A44_ES_VERTS_PER_SUBGRP(vgt_gs_onchip_cntl); + } else { + primgroup_size = 128; /* recommended without a GS and tess */ + vertgroup_size = 0; + } + + ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | + S_03096C_VERT_GRP_SIZE(vertgroup_size) | + S_03096C_BREAK_WAVE_AT_EOI(key.u.uses_tess && key.u.tess_uses_prim_id); + } + + /* Note: GE_CNTL.PACKET_TO_ONE_PA should only be set if LINE_STIPPLE_TEX_ENA == 1. + * Since we don't use that, we don't have to do anything. + */ + } + + if (vgt_stages != sctx->vgt_shader_stages_en || + (GFX_VERSION >= GFX10 && ge_cntl != sctx->ge_cntl)) { sctx->vgt_shader_stages_en = vgt_stages; + sctx->ge_cntl = ge_cntl; si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state); } @@ -1010,59 +1058,6 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx, radeon_end(); } -/* GFX10 removed IA_MULTI_VGT_PARAM in exchange for GE_CNTL. - * We overload last_multi_vgt_param. - */ -template ALWAYS_INLINE -static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches) -{ - union si_vgt_param_key key = sctx->ia_multi_vgt_param_key; - unsigned ge_cntl; - - if (NGG) { - if (HAS_TESS) { - if (GFX_VERSION >= GFX11) { - ge_cntl = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ge_cntl | - S_03096C_BREAK_PRIMGRP_AT_EOI(key.u.tess_uses_prim_id); - } else { - ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(num_patches) | - S_03096C_VERT_GRP_SIZE(0) | - S_03096C_BREAK_WAVE_AT_EOI(key.u.tess_uses_prim_id); - } - } else { - ge_cntl = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ge_cntl; - } - } else { - unsigned primgroup_size; - unsigned vertgroup_size; - assert(GFX_VERSION < GFX11); - - if (HAS_TESS) { - primgroup_size = num_patches; /* must be a multiple of NUM_PATCHES */ - vertgroup_size = 0; - } else if (HAS_GS) { - unsigned vgt_gs_onchip_cntl = sctx->shader.gs.current->gs.vgt_gs_onchip_cntl; - primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); - vertgroup_size = G_028A44_ES_VERTS_PER_SUBGRP(vgt_gs_onchip_cntl); - } else { - primgroup_size = 128; /* recommended without a GS and tess */ - vertgroup_size = 0; - } - - ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | - S_03096C_VERT_GRP_SIZE(vertgroup_size) | - S_03096C_BREAK_WAVE_AT_EOI(key.u.uses_tess && key.u.tess_uses_prim_id); - } - - /* Note: GE_CNTL.PACKET_TO_ONE_PA should only be set if LINE_STIPPLE_TEX_ENA == 1. - * Since we don't use that, we don't have to do anything. - */ - - radeon_begin(&sctx->gfx_cs); - radeon_opt_set_uconfig_reg(sctx, R_03096C_GE_CNTL, SI_TRACKED_GE_CNTL, ge_cntl); - radeon_end(); -} - template ALWAYS_INLINE static void si_emit_draw_registers(struct si_context *sctx, @@ -1074,12 +1069,11 @@ static void si_emit_draw_registers(struct si_context *sctx, struct radeon_cmdbuf *cs = &sctx->gfx_cs; unsigned num_patches = HAS_TESS ? sctx->num_patches_per_workgroup : 0; - if (GFX_VERSION >= GFX10) - gfx10_emit_ge_cntl(sctx, num_patches); - else + if (GFX_VERSION <= GFX9) { si_emit_ia_multi_vgt_param (sctx, indirect, prim, num_patches, instance_count, primitive_restart, min_vertex_count); + } radeon_begin(cs); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 9496b52..395c980 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4173,6 +4173,19 @@ static void si_emit_vgt_pipeline_state(struct si_context *sctx, unsigned index) radeon_opt_set_context_reg(sctx, R_028B54_VGT_SHADER_STAGES_EN, SI_TRACKED_VGT_SHADER_STAGES_EN, sctx->vgt_shader_stages_en); radeon_end_update_context_roll(sctx); + + if (sctx->gfx_level >= GFX10) { + uint32_t ge_cntl = sctx->ge_cntl; + + if (sctx->gfx_level < GFX11 && sctx->shader.tes.cso) { + /* This must be a multiple of VGT_LS_HS_CONFIG.NUM_PATCHES. */ + ge_cntl |= S_03096C_PRIM_GRP_SIZE_GFX10(sctx->num_patches_per_workgroup); + } + + radeon_begin_again(cs); + radeon_opt_set_uconfig_reg(sctx, R_03096C_GE_CNTL, SI_TRACKED_GE_CNTL, ge_cntl); + radeon_end(); + } } static void si_emit_scratch_state(struct si_context *sctx, unsigned index) @@ -4446,7 +4459,10 @@ void si_update_tess_io_layout_state(struct si_context *sctx) if (has_primid_instancing_bug && tess_uses_primid) num_patches = 1; - sctx->num_patches_per_workgroup = num_patches; + if (sctx->num_patches_per_workgroup != num_patches) { + sctx->num_patches_per_workgroup = num_patches; + si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state); + } unsigned output_patch0_offset = input_patch_size * num_patches; unsigned perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;