ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */
+ ctx->tracked_regs.reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0;
/* Set all cleared context registers to saved. */
ctx->tracked_regs.reg_saved = BITFIELD64_MASK(SI_TRACKED_GE_PC_ALLOC);
si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.vgt_pipeline_state);
if (has_clear_state) {
si_set_tracked_regs_to_clear_state(ctx);
static void si_destroy_context(struct pipe_context *context)
{
struct si_context *sctx = (struct si_context *)context;
- int i;
/* Unreference the framebuffer normally to disable related logic
* properly.
if (sctx->cs_preamble_state_tmz)
si_pm4_free_state(sctx, sctx->cs_preamble_state_tmz, ~0);
- for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
- si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
-
if (sctx->fixed_func_tcs_shader_cache) {
hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
sctx->b.delete_tcs_state(&sctx->b, entry->data);
ac_print_shadowed_regs(&sscreen->info);
- STATIC_ASSERT(sizeof(union si_vgt_stages_key) == 1);
return &sscreen->b;
}
uint16_t gs_ring_state_dw_offset_tmz;
bool cs_preamble_has_vgt_flush;
bool cs_preamble_has_vgt_flush_tmz;
-
- struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES];
+ uint32_t vgt_shader_stages_en;
/* shaders */
union {
unsigned esgs_ring_size; /* in bytes */
};
-#define SI_NUM_VGT_STAGES_KEY_BITS 8
-#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
-
-/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
- * Some fields are set by state-change calls, most are set by draw_vbo.
- */
-union si_vgt_stages_key {
- struct {
-#if UTIL_ARCH_LITTLE_ENDIAN
- uint8_t tess : 1;
- uint8_t gs : 1;
- uint8_t ngg_passthrough : 1;
- uint8_t ngg : 1; /* gfx10+ */
- uint8_t streamout : 1; /* only used with NGG */
- uint8_t hs_wave32 : 1;
- uint8_t gs_wave32 : 1;
- uint8_t vs_wave32 : 1;
-#else /* UTIL_ARCH_BIG_ENDIAN */
- uint8_t vs_wave32 : 1;
- uint8_t gs_wave32 : 1;
- uint8_t hs_wave32 : 1;
- uint8_t streamout : 1;
- uint8_t ngg : 1;
- uint8_t ngg_passthrough : 1;
- uint8_t gs : 1;
- uint8_t tess : 1;
-#endif
- } u;
- uint8_t index;
-};
-
struct si_shader {
struct si_pm4_state pm4; /* base class */
struct si_compiler_ctx_state compiler_ctx_state;
unsigned ge_pc_alloc; /* uconfig register */
unsigned spi_shader_pgm_rsrc3_gs;
unsigned spi_shader_pgm_rsrc4_gs;
- union si_vgt_stages_key vgt_stages;
+ unsigned vgt_shader_stages_en;
} ngg;
struct {
struct si_shader *hs;
struct si_shader *es;
struct si_shader *gs;
- struct si_pm4_state *vgt_shader_config;
struct si_shader *vs;
struct si_shader *ps;
struct si_sqtt_fake_pipeline *sqtt_pipeline;
static inline unsigned si_states_that_always_roll_context(void)
{
return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
- SI_STATE_BIT(poly_offset) | SI_STATE_BIT(vgt_shader_config));
+ SI_STATE_BIT(poly_offset));
}
union si_state_atoms {
struct si_atom window_rectangles;
struct si_atom shader_query;
struct si_atom ngg_cull_state;
+ struct si_atom vgt_pipeline_state;
} s;
struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];
};
SI_TRACKED_CB_SHADER_MASK,
SI_TRACKED_VGT_TF_PARAM,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+ SI_TRACKED_VGT_SHADER_STAGES_EN,
/* Non-context registers: */
SI_TRACKED_GE_PC_ALLOC,
void si_emit_dpbb_state(struct si_context *sctx);
/* si_state_shaders.cpp */
-struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key);
void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
unsigned wave_size, unsigned char ir_sha1_cache_key[20]);
bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_sha1_cache_key[20],
else
sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance;
- union si_vgt_stages_key key;
- key.index = 0;
-
/* Update VGT_SHADER_STAGES_EN. */
+ uint32_t vgt_stages = 0;
+
if (HAS_TESS) {
- key.u.tess = 1;
- if (GFX_VERSION >= GFX10)
- key.u.hs_wave32 = sctx->queued.named.hs->wave_size == 32;
+ vgt_stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+ S_028B54_HS_EN(1) |
+ S_028B54_DYNAMIC_HS(1) |
+ S_028B54_HS_W32_EN(GFX_VERSION >= GFX10 &&
+ sctx->queued.named.hs->wave_size == 32);
}
- if (HAS_GS)
- key.u.gs = 1;
+
if (NGG) {
- key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_stages.index;
- } else if (GFX_VERSION >= GFX10) {
+ vgt_stages |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_shader_stages_en;
+ } else {
if (HAS_GS) {
- key.u.gs_wave32 = sctx->shader.gs.current->wave_size == 32;
- key.u.vs_wave32 = sctx->shader.gs.current->gs_copy_shader->wave_size == 32;
- } else {
- key.u.vs_wave32 = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32;
+ /* Legacy GS only supports Wave64. */
+ assert(sctx->shader.gs.current->wave_size == 64);
+
+ vgt_stages |= S_028B54_ES_EN(HAS_TESS ? V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER) |
+ S_028B54_VS_W32_EN(GFX_VERSION >= GFX10 &&
+ sctx->shader.gs.current->gs_copy_shader->wave_size == 32);
+ } else if (HAS_TESS) {
+ vgt_stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
}
+
+ vgt_stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(GFX_VERSION >= GFX9 ? 2 : 0) |
+ S_028B54_VS_W32_EN(!HAS_GS && GFX_VERSION >= GFX10 &&
+ si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32);
}
- struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
- if (unlikely(!*pm4))
- *pm4 = si_build_vgt_shader_config(sctx->screen, key);
- si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
+ if (vgt_stages != sctx->vgt_shader_stages_en) {
+ sctx->vgt_shader_stages_en = vgt_stages;
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state);
+ }
struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1);
}
- shader->ngg.vgt_stages.u.ngg = 1;
- shader->ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader);
- shader->ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
- shader->ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
+ shader->ngg.vgt_shader_stages_en =
+ S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ?
+ V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(gs_stage == MESA_SHADER_GEOMETRY) |
+ S_028B54_PRIMGEN_EN(1) |
+ S_028B54_PRIMGEN_PASSTHRU_EN(gfx10_is_ngg_passthrough(shader)) |
+ S_028B54_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader) &&
+ sscreen->info.family >= CHIP_NAVI23) |
+ S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
+ S_028B54_GS_W32_EN(shader->wave_size == 32) |
+ S_028B54_MAX_PRIMGRP_IN_WAVE(2);
}
static void si_emit_shader_vs(struct si_context *sctx)
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
}
-struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key)
+static void si_emit_vgt_pipeline_state(struct si_context *sctx)
{
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- uint32_t stages = 0;
-
- if (key.u.tess) {
- stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
-
- if (key.u.gs)
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
- else if (key.u.ngg)
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
- else
- stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
- } else if (key.u.gs) {
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
- } else if (key.u.ngg) {
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
- }
-
- if (key.u.ngg) {
- stages |= S_028B54_PRIMGEN_EN(1) |
- S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
- S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough) |
- S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key.u.ngg_passthrough &&
- screen->info.family >= CHIP_NAVI23);
- } else if (key.u.gs) {
- stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
- }
-
- if (screen->info.gfx_level >= GFX9)
- stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
-
- if (screen->info.gfx_level >= GFX10) {
- stages |= S_028B54_HS_W32_EN(key.u.hs_wave32) |
- S_028B54_GS_W32_EN(key.u.gs_wave32) |
- S_028B54_VS_W32_EN(screen->info.gfx_level < GFX11 && key.u.vs_wave32);
- /* Legacy GS only supports Wave64. Read it as an implication. */
- assert(!(key.u.gs && !key.u.ngg) || !key.u.gs_wave32);
- }
+ struct radeon_cmdbuf *cs = &sctx->gfx_cs;
- si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
- return pm4;
+ radeon_begin(cs);
+ radeon_opt_set_context_reg(sctx, R_028B54_VGT_SHADER_STAGES_EN, SI_TRACKED_VGT_SHADER_STAGES_EN,
+ sctx->vgt_shader_stages_en);
+ radeon_end_update_context_roll(sctx);
}
static void si_emit_scratch_state(struct si_context *sctx)
void si_init_shader_functions(struct si_context *sctx)
{
+ sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state;
sctx->atoms.s.scratch_state.emit = si_emit_scratch_state;
sctx->b.create_vs_state = si_create_shader;