radeonsi: set PA_SU_PRIM_FILTER_CNTL optimally
authorMarek Olšák <marek.olsak@amd.com>
Sat, 29 Sep 2018 00:16:13 +0000 (20:16 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 16 Oct 2018 19:28:22 +0000 (15:28 -0400)
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h

index 84f5e4c..474ffb5 100644 (file)
@@ -339,6 +339,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
index babd171..132085a 100644 (file)
@@ -3242,6 +3242,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
 static void si_emit_msaa_sample_locs(struct si_context *sctx)
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
+       struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        unsigned nr_samples = sctx->framebuffer.nr_samples;
        bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
 
@@ -3263,7 +3264,6 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
        }
 
        if (sctx->family >= CHIP_POLARIS10) {
-               struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
                unsigned small_prim_filter_cntl =
                        S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
                        /* line bug */
@@ -3283,6 +3283,16 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
                                           SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
                                           small_prim_filter_cntl);
        }
+
+       /* The exclusion bits can be set to improve rasterization efficiency
+        * if no sample lies on the pixel boundary (-8 sample offset).
+        */
+       bool exclusion = sctx->chip_class >= CIK &&
+                        (!rs->multisample_enable || nr_samples != 16);
+       radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+                                  SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
+                                  S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
+                                  S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
 }
 
 static bool si_out_of_order_rasterization(struct si_context *sctx)
@@ -4861,9 +4871,6 @@ static void si_init_config(struct si_context *sctx)
                si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
                               S_008A14_CLIP_VTX_REORDER_ENA(1));
 
-       if (!has_clear_state)
-               si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
        /* CLEAR_STATE doesn't clear these correctly on certain generations.
         * I don't know why. Deduced by trial and error.
         */
index 173e210..c3b56ff 100644 (file)
@@ -262,6 +262,7 @@ enum si_tracked_reg {
        SI_TRACKED_DB_EQAA,
        SI_TRACKED_PA_SC_MODE_CNTL_1,
 
+       SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
        SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
 
        SI_TRACKED_PA_CL_VS_OUT_CNTL,