radeonsi: don't set DrawID and StartInstance if they are unused
authorMarek Olšák <marek.olsak@amd.com>
Wed, 25 Nov 2020 08:28:10 +0000 (03:28 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Dec 2020 20:33:03 +0000 (15:33 -0500)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7721>

src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_nir.c
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 34589112c95ffaa9f173ddf11dfd63f0d62e5967..bdbe81878b7678f6cbe8f86060114379b26cf7c6 100644 (file)
@@ -45,6 +45,8 @@
  * one which will mean "unknown" for the purpose of state tracking and
  * the number shouldn't be a commonly-used one. */
 #define SI_BASE_VERTEX_UNKNOWN    INT_MIN
+#define SI_START_INSTANCE_UNKNOWN INT_MIN
+#define SI_DRAW_ID_UNKNOWN        INT_MIN
 #define SI_RESTART_INDEX_UNKNOWN  INT_MIN
 #define SI_INSTANCE_COUNT_UNKNOWN INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES  8
@@ -1045,6 +1047,8 @@ struct si_context {
    bool do_update_shaders;
    bool compute_shaderbuf_sgprs_dirty;
    bool compute_image_sgprs_dirty;
+   bool vs_uses_base_instance;
+   bool vs_uses_draw_id;
 
    /* shader descriptors */
    struct si_descriptors descriptors[SI_NUM_DESCS];
@@ -1634,6 +1638,8 @@ static inline void si_context_add_resource_size(struct si_context *sctx, struct
 static inline void si_invalidate_draw_sh_constants(struct si_context *sctx)
 {
    sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
+   sctx->last_start_instance = SI_START_INSTANCE_UNKNOWN;
+   sctx->last_drawid = SI_DRAW_ID_UNKNOWN;
 }
 
 static inline void si_invalidate_draw_constants(struct si_context *sctx)
index 252a7a5867923b198530129b08069eec55ce4c7d..f40fb02ce54a837060767afda42b8a9e53fa7230 100644 (file)
@@ -2630,6 +2630,22 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
                                    sel->info.stage == MESA_SHADER_VERTEX &&
                                    sel->so.num_outputs;
 
+   if (sel->info.stage == MESA_SHADER_VERTEX) {
+      shader->uses_base_instance = sel->info.uses_base_instance ||
+                                   shader->key.part.vs.prolog.instance_divisor_is_one ||
+                                   shader->key.part.vs.prolog.instance_divisor_is_fetched;
+   } else if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
+      shader->uses_base_instance = shader->previous_stage_sel &&
+                                   (shader->previous_stage_sel->info.uses_base_instance ||
+                                    shader->key.part.tcs.ls_prolog.instance_divisor_is_one ||
+                                    shader->key.part.tcs.ls_prolog.instance_divisor_is_fetched);
+   } else if (sel->info.stage == MESA_SHADER_GEOMETRY) {
+      shader->uses_base_instance = shader->previous_stage_sel &&
+                                   (shader->previous_stage_sel->info.uses_base_instance ||
+                                    shader->key.part.gs.vs_prolog.instance_divisor_is_one ||
+                                    shader->key.part.gs.vs_prolog.instance_divisor_is_fetched);
+   }
+
    si_fix_resource_usage(sscreen, shader);
    si_shader_dump(sscreen, shader, debug, stderr, true);
 
index 05e596c6027da18ab2687d9d39b2fd5bfb6ebb32..c31c5ae46c1aebee449d1299ebb02f3da9c53207 100644 (file)
@@ -368,6 +368,7 @@ struct si_shader_info {
    bool uses_interp_at_sample;
    bool uses_instanceid;
    bool uses_base_vertex;
+   bool uses_base_instance;
    bool uses_drawid;
    bool uses_primid;
    bool uses_frontface;
@@ -752,6 +753,8 @@ struct si_shader {
    bool uses_vs_state_provoking_vertex;
    bool uses_vs_state_outprim;
 
+   bool uses_base_instance;
+
    struct {
       uint16_t ngg_emit_size; /* in dwords */
       uint16_t hw_max_esverts;
index 9ce534f3591e018b2ce4e033a63900247a6aed94..0624822b394af944e46d4df3d6951f90b8dc8cbb 100644 (file)
@@ -346,6 +346,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
    info->uses_frontface = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_FRONT_FACE);
    info->uses_instanceid = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID);
    info->uses_base_vertex = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX);
+   info->uses_base_instance = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE);
    info->uses_invocationid = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_INVOCATION_ID);
    info->uses_grid_size = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_NUM_WORK_GROUPS);
    info->uses_subgroup_info = nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
index f1565f497e818fb386e063eac3447b129a5f8177..1700efa68f0ac81fbab3d9c02ac95742624c1653 100644 (file)
@@ -973,6 +973,9 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
       /* Base vertex and start instance. */
       base_vertex = original_index_size ? info->index_bias : draws[0].start;
 
+      bool set_draw_id = sctx->vs_uses_draw_id;
+      bool set_base_instance = sctx->vs_uses_base_instance;
+
       if (sctx->num_vs_blit_sgprs) {
          /* Re-emit draw constants after we leave u_blitter. */
          si_invalidate_draw_sh_constants(sctx);
@@ -982,19 +985,38 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
          radeon_emit_array(cs, sctx->vs_blit_sh_data, sctx->num_vs_blit_sgprs);
       } else if (base_vertex != sctx->last_base_vertex ||
                  sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
-                 info->start_instance != sctx->last_start_instance ||
-                 info->drawid != sctx->last_drawid || sh_base_reg != sctx->last_sh_base_reg) {
-         radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3);
-         radeon_emit(cs, base_vertex);
-         radeon_emit(cs, info->drawid);
-         radeon_emit(cs, info->start_instance);
+                 (set_base_instance &&
+                  (info->start_instance != sctx->last_start_instance ||
+                   sctx->last_start_instance == SI_START_INSTANCE_UNKNOWN)) ||
+                 (set_draw_id &&
+                  (info->drawid != sctx->last_drawid ||
+                   sctx->last_drawid == SI_DRAW_ID_UNKNOWN)) ||
+                 sh_base_reg != sctx->last_sh_base_reg) {
+         if (set_base_instance) {
+            radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3);
+            radeon_emit(cs, base_vertex);
+            radeon_emit(cs, info->drawid);
+            radeon_emit(cs, info->start_instance);
+
+            sctx->last_start_instance = info->start_instance;
+            sctx->last_drawid = info->drawid;
+         } else if (set_draw_id) {
+            radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
+            radeon_emit(cs, base_vertex);
+            radeon_emit(cs, info->drawid);
+
+            sctx->last_drawid = info->drawid;
+         } else {
+            radeon_set_sh_reg(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, base_vertex);
+         }
 
          sctx->last_base_vertex = base_vertex;
-         sctx->last_start_instance = info->start_instance;
-         sctx->last_drawid = info->drawid;
          sctx->last_sh_base_reg = sh_base_reg;
       }
 
+      /* Don't update draw_id in the following code if it doesn't increment. */
+      set_draw_id &= info->increment_draw_id;
+
       if (index_size) {
          if (dispatch_prim_discard_cs) {
             for (unsigned i = 0; i < num_draws; i++) {
@@ -1010,7 +1032,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
          for (unsigned i = 0; i < num_draws; i++) {
             uint64_t va = index_va + draws[i].start * index_size;
 
-            if (i > 0 && info->increment_draw_id) {
+            if (i > 0 && set_draw_id) {
                unsigned draw_id = info->drawid + i;
 
                radeon_set_sh_reg(cs, sh_base_reg + SI_SGPR_DRAWID * 4, draw_id);
@@ -1028,7 +1050,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
                          * NOT_EOP doesn't work on gfx9 and older.
                          */
                         S_0287F0_NOT_EOP(sctx->chip_class >= GFX10 &&
-                                         !info->increment_draw_id &&
+                                         !set_draw_id &&
                                          i < num_draws - 1 &&
                                          !(sctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)));
          }
@@ -1049,7 +1071,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
                radeon_emit(cs, index_va >> 32);
 
                if (i > 0) {
-                  if (info->increment_draw_id) {
+                  if (set_draw_id) {
                      unsigned draw_id = info->drawid + i;
 
                      radeon_set_sh_reg(cs, sh_base_reg + SI_SGPR_DRAWID * 4, draw_id);
@@ -1067,7 +1089,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
 
          for (unsigned i = 0; i < num_draws; i++) {
             if (i > 0) {
-               if (info->increment_draw_id) {
+               if (set_draw_id) {
                   unsigned draw_id = info->drawid + i;
 
                   radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
index 651dd8c4c298e6496afe468c0b9dd7dd625a9b3a..8d8fee64bdf40c2b2220e88516f29e13fe30a62b 100644 (file)
@@ -3005,6 +3005,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
    sctx->vs_shader.cso = sel;
    sctx->vs_shader.current = sel ? sel->first_variant : NULL;
    sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
+   sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
 
    if (si_update_ngg(sctx))
       si_shader_change_notify(sctx);
@@ -4038,6 +4039,11 @@ bool si_update_shaders(struct si_context *sctx)
       key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
    }
 
+   sctx->vs_uses_base_instance =
+      sctx->vs_shader.current ? sctx->vs_shader.current->uses_base_instance :
+      sctx->queued.named.hs ? sctx->queued.named.hs->shader->uses_base_instance :
+      sctx->gs_shader.current->uses_base_instance;
+
    si_update_vgt_shader_config(sctx, key);
 
    if (old_kill_clip_distances != si_get_vs_state(sctx)->key.opt.kill_clip_distances)