This is for experiments with VRS.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7646>
OPT_BOOL(clamp_div_by_zero, false, "Clamp div by zero (x / 0 becomes FLT_MAX instead of NaN)")
OPT_BOOL(no_trunc_coord, false, "Always set TRUNC_COORD=0")
OPT_BOOL(shader_culling, false, "Cull primitives in shaders when benefical (without tess and GS)")
+OPT_BOOL(vrs2x2, false, "Enable 2x2 coarse shading for non-GUI elements")
#undef OPT_BOOL
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
#include "si_debug_options.h"
}
+ if (sscreen->info.chip_class < GFX10_3)
+ sscreen->options.vrs2x2 = false;
+
si_disk_cache_create(sscreen);
/* Determine the number of shader compiler threads. */
bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
+ bool writes_vrs = ctx->screen->options.vrs2x2;
/* Write the misc vector (point size, edgeflag, layer, viewport). */
- if (writes_psize || pos_writes_edgeflag ||
+ if (writes_psize || pos_writes_edgeflag || writes_vrs ||
shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
pos_args[1].enabled_channels = writes_psize |
- (pos_writes_edgeflag << 1) |
+ ((pos_writes_edgeflag | writes_vrs) << 1) |
(shader->selector->info.writes_layer << 2);
pos_args[1].valid_mask = 0; /* EXEC mask */
pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
}
+ if (writes_vrs) {
+ /* Bits [2:3] = VRS rate X
+ * Bits [4:5] = VRS rate Y
+ *
+ * The range is [-2, 1]. Values:
+ * 1: 2x coarser shading rate in that direction.
+ * 0: normal shading rate
+ * -1: 2x finer shading rate (sample shading, not directional)
+ * -2: 4x finer shading rate (sample shading, not directional)
+ *
+ * Sample shading can't go above 8 samples, so both numbers can't be -2
+ * at the same time.
+ */
+ LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0);
+
+ /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
+ rates = LLVMBuildSelect(ctx->ac.builder,
+ LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
+ pos_args[0].out[3], ctx->ac.f32_1, ""),
+ rates, ctx->ac.i32_0, "");
+
+ LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]);
+ v = LLVMBuildOr(ctx->ac.builder, v, rates, "");
+ pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
+ }
+
if (ctx->screen->info.chip_class >= GFX9) {
/* GFX9 has the layer in out.z[10:0] and the viewport
* index in out.z[19:16].
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
- S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
+ S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
+ !sctx->screen->options.vrs2x2) |
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
clipdist_mask | (culldist_mask << 8);
radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
+ if (sctx->screen->options.vrs2x2) {
+ /* If the shader is using discard, turn off coarse shading because
+ * discard at 2x2 pixel granularity degrades quality too much.
+ *
+ * MIN allows sample shading but not coarse shading.
+ */
+ unsigned mode = G_02880C_KILL_ENABLE(db_shader_control) ? V_028064_VRS_COMB_MODE_MIN
+ : V_028064_VRS_COMB_MODE_PASSTHRU;
+ radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
+ SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
+ S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
+ S_028064_VRS_OVERRIDE_RATE_X(0) |
+ S_028064_VRS_OVERRIDE_RATE_Y(0));
+ }
+
if (initial_cdw != sctx->gfx_cs->current.cdw)
sctx->context_roll = true;
}
if (sctx->chip_class >= GFX10_3) {
si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
- /* This allows sample shading. */
+ /* The rate combiners have no effect if they are disabled like this:
+ * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1
+ * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1
+ * HTILE_RATE: VRS_HTILE_ENCODING = 0
+ * SAMPLE_ITER: PS_ITER_SAMPLE = 0
+ *
+ * Use OVERRIDE, which will ignore results from previous combiners.
+ * (e.g. enabled sample shading overrides the vertex rate)
+ */
si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
- S_028848_SAMPLE_ITER_COMBINER_MODE(1));
+ S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
+ S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
}
sctx->cs_preamble_state = pm4;
#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
(S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) | \
S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
- S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1))
+ S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | \
+ S_02881C_USE_VTX_VRS_RATE(1))
/* The list of registers whose emitted values are remembered by si_context. */
enum si_tracked_reg
SI_TRACKED_PA_SC_BINNER_CNTL_0,
SI_TRACKED_DB_DFSM_CONTROL,
+ SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
shader_variant_flags |= 1 << 8;
if (sel->screen->debug_flags & DBG(GISEL))
shader_variant_flags |= 1 << 9;
+ if ((sel->info.stage == MESA_SHADER_VERTEX ||
+ sel->info.stage == MESA_SHADER_TESS_EVAL ||
+ sel->info.stage == MESA_SHADER_GEOMETRY) &&
+ !es &&
+ sel->screen->options.vrs2x2)
+ shader_variant_flags |= 1 << 10;
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
writes_psize &= !shader->key.opt.kill_pointsize;
bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+ sel->screen->options.vrs2x2 ||
sel->info.writes_layer || sel->info.writes_viewport_index;
return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
+ S_02881C_USE_VTX_VRS_RATE(sel->screen->options.vrs2x2) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |