radeonsi:optimizing SET_CONTEXT_REG for shaders vgt_vertex_reuse
authorSonny Jiang <sonny.jiang@amd.com>
Wed, 3 Oct 2018 15:53:14 +0000 (11:53 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 5 Oct 2018 23:04:13 +0000 (19:04 -0400)
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 532a636..3ddd786 100644 (file)
@@ -377,6 +377,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL]  = 0x0000001e; /* From VI */
 
                /* Set all saved registers state to saved. */
                ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
index 49b1ccd..09dd558 100644 (file)
@@ -689,6 +689,7 @@ struct si_shader {
 
        /*For save precompute registers value */
        unsigned vgt_tf_param; /* VGT_TF_PARAM */
+       unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 };
 
 struct si_shader_part {
index 54b03e0..fffc636 100644 (file)
@@ -313,6 +313,7 @@ enum si_tracked_reg {
 
        SI_TRACKED_CB_SHADER_MASK,
        SI_TRACKED_VGT_TF_PARAM,
+       SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 
        SI_NUM_TRACKED_REGS,
 };
index e493f99..2bdac33 100644 (file)
@@ -440,8 +440,8 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
                    PIPE_TESS_SPACING_FRACTIONAL_ODD)
                        vtx_reuse_depth = 14;
 
-               si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
-                              vtx_reuse_depth);
+               assert(pm4->shader);
+               pm4->shader->vgt_vertex_reuse_block_cntl = vtx_reuse_depth;
        }
 }
 
@@ -574,6 +574,10 @@ static void si_emit_shader_es(struct si_context *sctx)
                                           SI_TRACKED_VGT_TF_PARAM,
                                           shader->vgt_tf_param);
 
+       if (shader->vgt_vertex_reuse_block_cntl)
+               radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          shader->vgt_vertex_reuse_block_cntl);
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -813,6 +817,10 @@ static void si_emit_shader_gs(struct si_context *sctx)
                        radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
                                                   SI_TRACKED_VGT_TF_PARAM,
                                                   shader->vgt_tf_param);
+               if (shader->vgt_vertex_reuse_block_cntl)
+                       radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                                  SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                                  shader->vgt_vertex_reuse_block_cntl);
        }
 }
 
@@ -981,6 +989,11 @@ static void si_emit_shader_vs(struct si_context *sctx)
                radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
                                           SI_TRACKED_VGT_TF_PARAM,
                                           shader->vgt_tf_param);
+
+       if (shader->vgt_vertex_reuse_block_cntl)
+               radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          shader->vgt_vertex_reuse_block_cntl);
 }
 
 /**