From: Marek Olšák Date: Fri, 12 May 2023 21:10:30 +0000 (-0400) Subject: radeonsi: completely rewrite how VGT_SHADER_STAGES_EN is set X-Git-Tag: upstream/23.3.3~7542 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5f2779f62ade7b142165c33cb093ec67162db2ee;p=platform%2Fupstream%2Fmesa.git radeonsi: completely rewrite how VGT_SHADER_STAGES_EN is set Use a state atom with an emit function instead of precomputing up to 256 pm4 states in si_context. Some register fields are precomputed in si_shader for NGG. Others are set in si_update_shaders. Acked-by: Qiang Yu Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index a7441e2..3aa9df7 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -292,6 +292,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ + ctx->tracked_regs.reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0; /* Set all cleared context registers to saved. */ ctx->tracked_regs.reg_saved = BITFIELD64_MASK(SI_TRACKED_GE_PC_ALLOC); @@ -498,6 +499,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); + si_mark_atom_dirty(ctx, &ctx->atoms.s.vgt_pipeline_state); if (has_clear_state) { si_set_tracked_regs_to_clear_state(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index cd499ce..0b245fa 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -181,7 +181,6 @@ static void decref_implicit_resource(struct hash_entry *entry) static void si_destroy_context(struct pipe_context *context) { struct si_context *sctx = (struct si_context *)context; - int i; /* Unreference the framebuffer normally to disable related logic * properly. @@ -225,9 +224,6 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->cs_preamble_state_tmz) si_pm4_free_state(sctx, sctx->cs_preamble_state_tmz, ~0); - for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++) - si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config)); - if (sctx->fixed_func_tcs_shader_cache) { hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) { sctx->b.delete_tcs_state(&sctx->b, entry->data); @@ -1465,7 +1461,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, ac_print_shadowed_regs(&sscreen->info); - STATIC_ASSERT(sizeof(union si_vgt_stages_key) == 1); return &sscreen->b; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 795f3e5..7b08160 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1048,8 +1048,7 @@ struct si_context { uint16_t gs_ring_state_dw_offset_tmz; bool cs_preamble_has_vgt_flush; bool cs_preamble_has_vgt_flush_tmz; - - struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES]; + uint32_t vgt_shader_stages_en; /* shaders */ union { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1785a04..3a3adc2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -861,37 +861,6 @@ struct gfx9_gs_info { unsigned esgs_ring_size; /* in bytes */ }; -#define SI_NUM_VGT_STAGES_KEY_BITS 8 -#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) - -/* The VGT_SHADER_STAGES key used to index the table of precomputed values. - * Some fields are set by state-change calls, most are set by draw_vbo. - */ -union si_vgt_stages_key { - struct { -#if UTIL_ARCH_LITTLE_ENDIAN - uint8_t tess : 1; - uint8_t gs : 1; - uint8_t ngg_passthrough : 1; - uint8_t ngg : 1; /* gfx10+ */ - uint8_t streamout : 1; /* only used with NGG */ - uint8_t hs_wave32 : 1; - uint8_t gs_wave32 : 1; - uint8_t vs_wave32 : 1; -#else /* UTIL_ARCH_BIG_ENDIAN */ - uint8_t vs_wave32 : 1; - uint8_t gs_wave32 : 1; - uint8_t hs_wave32 : 1; - uint8_t streamout : 1; - uint8_t ngg : 1; - uint8_t ngg_passthrough : 1; - uint8_t gs : 1; - uint8_t tess : 1; -#endif - } u; - uint8_t index; -}; - struct si_shader { struct si_pm4_state pm4; /* base class */ struct si_compiler_ctx_state compiler_ctx_state; @@ -985,7 +954,7 @@ struct si_shader { unsigned ge_pc_alloc; /* uconfig register */ unsigned spi_shader_pgm_rsrc3_gs; unsigned spi_shader_pgm_rsrc4_gs; - union si_vgt_stages_key vgt_stages; + unsigned vgt_shader_stages_en; } ngg; struct { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 0470d46..8020ced 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -166,7 +166,6 @@ union si_state { struct si_shader *hs; struct si_shader *es; struct si_shader *gs; - struct si_pm4_state *vgt_shader_config; struct si_shader *vs; struct si_shader *ps; struct si_sqtt_fake_pipeline *sqtt_pipeline; @@ -181,7 +180,7 @@ union si_state { static inline unsigned si_states_that_always_roll_context(void) { return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) | - SI_STATE_BIT(poly_offset) | SI_STATE_BIT(vgt_shader_config)); + SI_STATE_BIT(poly_offset)); } union si_state_atoms { @@ -210,6 +209,7 @@ union si_state_atoms { struct si_atom window_rectangles; struct si_atom shader_query; struct si_atom ngg_cull_state; + struct si_atom vgt_pipeline_state; } s; struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)]; }; @@ -314,6 +314,7 @@ enum si_tracked_reg SI_TRACKED_CB_SHADER_MASK, SI_TRACKED_VGT_TF_PARAM, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, + SI_TRACKED_VGT_SHADER_STAGES_EN, /* Non-context registers: */ SI_TRACKED_GE_PC_ALLOC, @@ -543,7 +544,6 @@ struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_ void si_emit_dpbb_state(struct si_context *sctx); /* si_state_shaders.cpp */ -struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key); void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, unsigned wave_size, unsigned char ir_sha1_cache_key[20]); bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_sha1_cache_key[20], diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 8aa2d44..e2a39ef 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -206,32 +206,42 @@ static bool si_update_shaders(struct si_context *sctx) else sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance; - union si_vgt_stages_key key; - key.index = 0; - /* Update VGT_SHADER_STAGES_EN. */ + uint32_t vgt_stages = 0; + if (HAS_TESS) { - key.u.tess = 1; - if (GFX_VERSION >= GFX10) - key.u.hs_wave32 = sctx->queued.named.hs->wave_size == 32; + vgt_stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | + S_028B54_HS_EN(1) | + S_028B54_DYNAMIC_HS(1) | + S_028B54_HS_W32_EN(GFX_VERSION >= GFX10 && + sctx->queued.named.hs->wave_size == 32); } - if (HAS_GS) - key.u.gs = 1; + if (NGG) { - key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_stages.index; - } else if (GFX_VERSION >= GFX10) { + vgt_stages |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_shader_stages_en; + } else { if (HAS_GS) { - key.u.gs_wave32 = sctx->shader.gs.current->wave_size == 32; - key.u.vs_wave32 = sctx->shader.gs.current->gs_copy_shader->wave_size == 32; - } else { - key.u.vs_wave32 = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32; + /* Legacy GS only supports Wave64. */ + assert(sctx->shader.gs.current->wave_size == 64); + + vgt_stages |= S_028B54_ES_EN(HAS_TESS ? V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(1) | + S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER) | + S_028B54_VS_W32_EN(GFX_VERSION >= GFX10 && + sctx->shader.gs.current->gs_copy_shader->wave_size == 32); + } else if (HAS_TESS) { + vgt_stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); } + + vgt_stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(GFX_VERSION >= GFX9 ? 2 : 0) | + S_028B54_VS_W32_EN(!HAS_GS && GFX_VERSION >= GFX10 && + si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32); } - struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index]; - if (unlikely(!*pm4)) - *pm4 = si_build_vgt_shader_config(sctx->screen, key); - si_pm4_bind_state(sctx, vgt_shader_config, *pm4); + if (vgt_stages != sctx->vgt_shader_stages_en) { + sctx->vgt_shader_stages_en = vgt_stages; + si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state); + } struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 6e1904c..2f28320 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1510,10 +1510,17 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1); } - shader->ngg.vgt_stages.u.ngg = 1; - shader->ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader); - shader->ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader); - shader->ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32; + shader->ngg.vgt_shader_stages_en = + S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ? + V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(gs_stage == MESA_SHADER_GEOMETRY) | + S_028B54_PRIMGEN_EN(1) | + S_028B54_PRIMGEN_PASSTHRU_EN(gfx10_is_ngg_passthrough(shader)) | + S_028B54_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader) && + sscreen->info.family >= CHIP_NAVI23) | + S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) | + S_028B54_GS_W32_EN(shader->wave_size == 32) | + S_028B54_MAX_PRIMGRP_IN_WAVE(2); } static void si_emit_shader_vs(struct si_context *sctx) @@ -4154,49 +4161,14 @@ void si_init_tess_factor_ring(struct si_context *sctx) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } -struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key) +static void si_emit_vgt_pipeline_state(struct si_context *sctx) { - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - uint32_t stages = 0; - - if (key.u.tess) { - stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); - - if (key.u.gs) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1); - else if (key.u.ngg) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); - else - stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (key.u.gs) { - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); - } else if (key.u.ngg) { - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); - } - - if (key.u.ngg) { - stages |= S_028B54_PRIMGEN_EN(1) | - S_028B54_NGG_WAVE_ID_EN(key.u.streamout) | - S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough) | - S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key.u.ngg_passthrough && - screen->info.family >= CHIP_NAVI23); - } else if (key.u.gs) { - stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - } - - if (screen->info.gfx_level >= GFX9) - stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); - - if (screen->info.gfx_level >= GFX10) { - stages |= S_028B54_HS_W32_EN(key.u.hs_wave32) | - S_028B54_GS_W32_EN(key.u.gs_wave32) | - S_028B54_VS_W32_EN(screen->info.gfx_level < GFX11 && key.u.vs_wave32); - /* Legacy GS only supports Wave64. Read it as an implication. */ - assert(!(key.u.gs && !key.u.ngg) || !key.u.gs_wave32); - } + struct radeon_cmdbuf *cs = &sctx->gfx_cs; - si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages); - return pm4; + radeon_begin(cs); + radeon_opt_set_context_reg(sctx, R_028B54_VGT_SHADER_STAGES_EN, SI_TRACKED_VGT_SHADER_STAGES_EN, + sctx->vgt_shader_stages_en); + radeon_end_update_context_roll(sctx); } static void si_emit_scratch_state(struct si_context *sctx) @@ -4328,6 +4300,7 @@ void si_init_screen_live_shader_cache(struct si_screen *sscreen) void si_init_shader_functions(struct si_context *sctx) { + sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state; sctx->atoms.s.scratch_state.emit = si_emit_scratch_state; sctx->b.create_vs_state = si_create_shader;