radeonsi: completely rewrite how VGT_SHADER_STAGES_EN is set

author Marek Olšák <marek.olsak@amd.com>

Fri, 12 May 2023 21:10:30 +0000 (17:10 -0400)

committer Marge Bot <emma+marge@anholt.net>

Tue, 6 Jun 2023 18:01:35 +0000 (18:01 +0000)
author Marek Olšák <marek.olsak@amd.com>
Fri, 12 May 2023 21:10:30 +0000 (17:10 -0400)
committer Marge Bot <emma+marge@anholt.net>
Tue, 6 Jun 2023 18:01:35 +0000 (18:01 +0000)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c

index a7441e2..3aa9df7 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -292,6 +292,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
     ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
     ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM]  = 0x00000000;
     ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL]  = 0x0000001e; /* From GFX8 */
+   ctx->tracked_regs.reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0;
  
     /* Set all cleared context registers to saved. */
     ctx->tracked_regs.reg_saved = BITFIELD64_MASK(SI_TRACKED_GE_PC_ALLOC);
@@ -498,6 +499,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
        si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+      si_mark_atom_dirty(ctx, &ctx->atoms.s.vgt_pipeline_state);
  
        if (has_clear_state) {
           si_set_tracked_regs_to_clear_state(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index cd499ce..0b245fa 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -181,7 +181,6 @@ static void decref_implicit_resource(struct hash_entry *entry)
  static void si_destroy_context(struct pipe_context *context)
  {
     struct si_context *sctx = (struct si_context *)context;
-   int i;
  
     /* Unreference the framebuffer normally to disable related logic
      * properly.
@@ -225,9 +224,6 @@ static void si_destroy_context(struct pipe_context *context)
     if (sctx->cs_preamble_state_tmz)
        si_pm4_free_state(sctx, sctx->cs_preamble_state_tmz, ~0);
  
-   for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
-      si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
-
     if (sctx->fixed_func_tcs_shader_cache) {
        hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
           sctx->b.delete_tcs_state(&sctx->b, entry->data);
@@ -1465,7 +1461,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
  
     ac_print_shadowed_regs(&sscreen->info);
  
-   STATIC_ASSERT(sizeof(union si_vgt_stages_key) == 1);
     return &sscreen->b;
  }
  
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 795f3e5..7b08160 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1048,8 +1048,7 @@ struct si_context {
     uint16_t gs_ring_state_dw_offset_tmz;
     bool cs_preamble_has_vgt_flush;
     bool cs_preamble_has_vgt_flush_tmz;
-
-   struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES];
+   uint32_t vgt_shader_stages_en;
  
     /* shaders */
     union {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h

index 1785a04..3a3adc2 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -861,37 +861,6 @@ struct gfx9_gs_info {
     unsigned esgs_ring_size; /* in bytes */
  };
  
-#define SI_NUM_VGT_STAGES_KEY_BITS 8
-#define SI_NUM_VGT_STAGES_STATES   (1 << SI_NUM_VGT_STAGES_KEY_BITS)
-
-/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
- * Some fields are set by state-change calls, most are set by draw_vbo.
- */
-union si_vgt_stages_key {
-   struct {
-#if UTIL_ARCH_LITTLE_ENDIAN
-      uint8_t tess : 1;
-      uint8_t gs : 1;
-      uint8_t ngg_passthrough : 1;
-      uint8_t ngg : 1;       /* gfx10+ */
-      uint8_t streamout : 1; /* only used with NGG */
-      uint8_t hs_wave32 : 1;
-      uint8_t gs_wave32 : 1;
-      uint8_t vs_wave32 : 1;
-#else /* UTIL_ARCH_BIG_ENDIAN */
-      uint8_t vs_wave32 : 1;
-      uint8_t gs_wave32 : 1;
-      uint8_t hs_wave32 : 1;
-      uint8_t streamout : 1;
-      uint8_t ngg : 1;
-      uint8_t ngg_passthrough : 1;
-      uint8_t gs : 1;
-      uint8_t tess : 1;
-#endif
-   } u;
-   uint8_t index;
-};
-
  struct si_shader {
     struct si_pm4_state pm4; /* base class */
     struct si_compiler_ctx_state compiler_ctx_state;
@@ -985,7 +954,7 @@ struct si_shader {
           unsigned ge_pc_alloc;         /* uconfig register */
           unsigned spi_shader_pgm_rsrc3_gs;
           unsigned spi_shader_pgm_rsrc4_gs;
-         union si_vgt_stages_key vgt_stages;
+         unsigned vgt_shader_stages_en;
        } ngg;
  
        struct {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h

index 0470d46..8020ced 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -166,7 +166,6 @@ union si_state {
        struct si_shader *hs;
        struct si_shader *es;
        struct si_shader *gs;
-      struct si_pm4_state *vgt_shader_config;
        struct si_shader *vs;
        struct si_shader *ps;
        struct si_sqtt_fake_pipeline *sqtt_pipeline;
@@ -181,7 +180,7 @@ union si_state {
  static inline unsigned si_states_that_always_roll_context(void)
  {
     return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) |
-           SI_STATE_BIT(poly_offset) | SI_STATE_BIT(vgt_shader_config));
+           SI_STATE_BIT(poly_offset));
  }
  
  union si_state_atoms {
@@ -210,6 +209,7 @@ union si_state_atoms {
        struct si_atom window_rectangles;
        struct si_atom shader_query;
        struct si_atom ngg_cull_state;
+      struct si_atom vgt_pipeline_state;
     } s;
     struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];
  };
@@ -314,6 +314,7 @@ enum si_tracked_reg
     SI_TRACKED_CB_SHADER_MASK,
     SI_TRACKED_VGT_TF_PARAM,
     SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+   SI_TRACKED_VGT_SHADER_STAGES_EN,
  
     /* Non-context registers: */
     SI_TRACKED_GE_PC_ALLOC,
@@ -543,7 +544,6 @@ struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_
  void si_emit_dpbb_state(struct si_context *sctx);
  
  /* si_state_shaders.cpp */
-struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key);
  void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
                           unsigned wave_size, unsigned char ir_sha1_cache_key[20]);
  bool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_sha1_cache_key[20],
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp

index 8aa2d44..e2a39ef 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -206,32 +206,42 @@ static bool si_update_shaders(struct si_context *sctx)
     else
        sctx->vs_uses_base_instance = sctx->shader.vs.current->uses_base_instance;
  
-   union si_vgt_stages_key key;
-   key.index = 0;
-
     /* Update VGT_SHADER_STAGES_EN. */
+   uint32_t vgt_stages = 0;
+
     if (HAS_TESS) {
-      key.u.tess = 1;
-      if (GFX_VERSION >= GFX10)
-         key.u.hs_wave32 = sctx->queued.named.hs->wave_size == 32;
+      vgt_stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+                    S_028B54_HS_EN(1) |
+                    S_028B54_DYNAMIC_HS(1) |
+                    S_028B54_HS_W32_EN(GFX_VERSION >= GFX10 &&
+                                       sctx->queued.named.hs->wave_size == 32);
     }
-   if (HAS_GS)
-      key.u.gs = 1;
+
     if (NGG) {
-      key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_stages.index;
-   } else if (GFX_VERSION >= GFX10) {
+      vgt_stages |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ngg.vgt_shader_stages_en;
+   } else {
        if (HAS_GS) {
-         key.u.gs_wave32 = sctx->shader.gs.current->wave_size == 32;
-         key.u.vs_wave32 = sctx->shader.gs.current->gs_copy_shader->wave_size == 32;
-      } else {
-         key.u.vs_wave32 = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32;
+         /* Legacy GS only supports Wave64. */
+         assert(sctx->shader.gs.current->wave_size == 64);
+
+         vgt_stages |= S_028B54_ES_EN(HAS_TESS ? V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) |
+                       S_028B54_GS_EN(1) |
+                       S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER) |
+                       S_028B54_VS_W32_EN(GFX_VERSION >= GFX10 &&
+                                          sctx->shader.gs.current->gs_copy_shader->wave_size == 32);
+      } else if (HAS_TESS) {
+         vgt_stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
        }
+
+      vgt_stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(GFX_VERSION >= GFX9 ? 2 : 0) |
+                    S_028B54_VS_W32_EN(!HAS_GS && GFX_VERSION >= GFX10 &&
+                                       si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->wave_size == 32);
     }
  
-   struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
-   if (unlikely(!*pm4))
-      *pm4 = si_build_vgt_shader_config(sctx->screen, key);
-   si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
+   if (vgt_stages != sctx->vgt_shader_stages_en) {
+      sctx->vgt_shader_stages_en = vgt_stages;
+      si_mark_atom_dirty(sctx, &sctx->atoms.s.vgt_pipeline_state);
+   }
  
     struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
  
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp

index 6e1904c..2f28320 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -1510,10 +1510,17 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
                                     S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1);
     }
  
-   shader->ngg.vgt_stages.u.ngg = 1;
-   shader->ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader);
-   shader->ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
-   shader->ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
+   shader->ngg.vgt_shader_stages_en =
+      S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ?
+                        V_028B54_ES_STAGE_DS : V_028B54_ES_STAGE_REAL) |
+      S_028B54_GS_EN(gs_stage == MESA_SHADER_GEOMETRY) |
+      S_028B54_PRIMGEN_EN(1) |
+      S_028B54_PRIMGEN_PASSTHRU_EN(gfx10_is_ngg_passthrough(shader)) |
+      S_028B54_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader) &&
+                                       sscreen->info.family >= CHIP_NAVI23) |
+      S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
+      S_028B54_GS_W32_EN(shader->wave_size == 32) |
+      S_028B54_MAX_PRIMGRP_IN_WAVE(2);
  }
  
  static void si_emit_shader_vs(struct si_context *sctx)
@@ -4154,49 +4161,14 @@ void si_init_tess_factor_ring(struct si_context *sctx)
     si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
  }
  
-struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, union si_vgt_stages_key key)
+static void si_emit_vgt_pipeline_state(struct si_context *sctx)
  {
-   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-   uint32_t stages = 0;
-
-   if (key.u.tess) {
-      stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
-
-      if (key.u.gs)
-         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
-      else if (key.u.ngg)
-         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
-      else
-         stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
-   } else if (key.u.gs) {
-      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
-   } else if (key.u.ngg) {
-      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
-   }
-
-   if (key.u.ngg) {
-      stages |= S_028B54_PRIMGEN_EN(1) |
-                S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
-                S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough) |
-                S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key.u.ngg_passthrough &&
-                                                 screen->info.family >= CHIP_NAVI23);
-   } else if (key.u.gs) {
-      stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
-   }
-
-   if (screen->info.gfx_level >= GFX9)
-      stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
-
-   if (screen->info.gfx_level >= GFX10) {
-      stages |= S_028B54_HS_W32_EN(key.u.hs_wave32) |
-                S_028B54_GS_W32_EN(key.u.gs_wave32) |
-                S_028B54_VS_W32_EN(screen->info.gfx_level < GFX11 && key.u.vs_wave32);
-      /* Legacy GS only supports Wave64. Read it as an implication. */
-      assert(!(key.u.gs && !key.u.ngg) || !key.u.gs_wave32);
-   }
+   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
  
-   si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
-   return pm4;
+   radeon_begin(cs);
+   radeon_opt_set_context_reg(sctx, R_028B54_VGT_SHADER_STAGES_EN, SI_TRACKED_VGT_SHADER_STAGES_EN,
+                              sctx->vgt_shader_stages_en);
+   radeon_end_update_context_roll(sctx);
  }
  
  static void si_emit_scratch_state(struct si_context *sctx)
@@ -4328,6 +4300,7 @@ void si_init_screen_live_shader_cache(struct si_screen *sscreen)
  
  void si_init_shader_functions(struct si_context *sctx)
  {
+   sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state;
     sctx->atoms.s.scratch_state.emit = si_emit_scratch_state;
  
     sctx->b.create_vs_state = si_create_shader;
author	Marek Olšák <marek.olsak@amd.com>
	Fri, 12 May 2023 21:10:30 +0000 (17:10 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 6 Jun 2023 18:01:35 +0000 (18:01 +0000)
src/gallium/drivers/radeonsi/si_gfx_cs.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_draw.cpp		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_shaders.cpp		patch \| blob \| history