radeonsi: adjust 16x EQAA sample locs to make PA_SU_PRIM_FILTER_CNTL immutable
authorMarek Olšák <marek.olsak@amd.com>
Mon, 15 May 2023 00:28:52 +0000 (20:28 -0400)
committerMarge Bot <emma+marge@anholt.net>
Tue, 6 Jun 2023 18:01:35 +0000 (18:01 +0000)
and move PA_SU_PRIM_FILTER_CNTL to the gfx preamble.

If sample locations don't use the -8 coordinate, the EXCLUSION can always
be set to 1.

This is part 2 of simplifying si_emit_msaa_sample_locs.

Acked-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22833>

src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_msaa.c

index 329517b..0539a74 100644 (file)
@@ -265,7 +265,6 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
    ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE] = 0;
    ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0;
    ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
-   ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
    ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x2;
    ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT] = 0;
    ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0;
index 51e8a19..dcc2730 100644 (file)
@@ -3594,14 +3594,6 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
       radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
                                  SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
    }
-
-   /* The exclusion bits can be set to improve rasterization efficiency
-    * if no sample lies on the pixel boundary (-8 sample offset).
-    */
-   bool exclusion = sctx->gfx_level >= GFX7 && (!rs->multisample_enable || nr_samples != 16);
-   radeon_opt_set_context_reg(
-      sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
-      S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
    radeon_end();
 }
 
@@ -3744,7 +3736,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
          4, /* 2x MSAA */
          6, /* 4x MSAA */
          7, /* 8x MSAA */
-         8, /* 16x MSAA */
+         7, /* 16x MSAA */
       };
       unsigned log_samples = util_logbase2(coverage_samples);
 
@@ -5756,6 +5748,11 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
       si_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0);
    }
 
+   /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
+   si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+                  S_02882C_XMAX_RIGHT_EXCLUSION(sctx->gfx_level >= GFX7) |
+                  S_02882C_YMAX_BOTTOM_EXCLUSION(sctx->gfx_level >= GFX7));
+
    if (sctx->gfx_level <= GFX7 || !has_clear_state) {
       if (sctx->gfx_level < GFX11) {
          si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
index e8dc9f0..5f6321e 100644 (file)
@@ -272,7 +272,6 @@ enum si_tracked_context_reg
    SI_TRACKED_PA_SC_LINE_STIPPLE,
    SI_TRACKED_PA_SC_MODE_CNTL_1,
    SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
-   SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
    SI_TRACKED_SPI_PS_IN_CONTROL,
    SI_TRACKED_VGT_GS_INSTANCE_CNT,
    SI_TRACKED_VGT_GS_MAX_VERT_OUT,
index 2379f48..a324291 100644 (file)
@@ -97,7 +97,11 @@ static const uint32_t sample_locs_16x[] = {
    FILL_SREG(-5, -2, 5, 3, -2, 6, 3, -5),
    FILL_SREG(-4, -6, 1, 1, -6, 4, 7, -4),
    FILL_SREG(-1, -3, 6, 7, -3, 2, 0, -7),
-   FILL_SREG(-7, -8, 2, 5, -8, 0, 4, -1),
+   /* We use -7 where DX sample locations want -8, which allows us to make
+    * the PA_SU_PRIM_FILTER_CNTL register immutable. That's a quality compromise
+    * for underused 16x EQAA.
+    */
+   FILL_SREG(-7, -7 /* DX uses -8 */, 2, 5, -7 /* DX uses -8 */, 0, 4, -1),
 };
 static const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull;