This patch is to to remove PKT3_CONTEXT_REG_RMW from radeonsi.
and avoid multiple command buffer(PM4 packet)creation for R_02881C_PA_CL_VS_OUT_CNTL.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12789>
radeon_emit(value); \
} while (0)
-#define radeon_set_context_reg_rmw(reg, value, mask) do { \
- SI_CHECK_SHADOWED_REGS(reg, 1); \
- assert((reg) >= SI_CONTEXT_REG_OFFSET); \
- radeon_emit(PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); \
- radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
- radeon_emit(mask); \
- radeon_emit(value); \
-} while (0)
-
-/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */
-#define radeon_opt_set_context_reg_rmw(sctx, offset, reg, val, mask) do { \
- unsigned __value = (val); \
- assert((__value & ~mask) == 0); \
- __value &= mask; \
- if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
- sctx->tracked_regs.reg_value[reg] != __value) { \
- radeon_set_context_reg_rmw(offset, __value, mask); \
- sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
- sctx->tracked_regs.reg_value[reg] = __value; \
- } \
-} while (0)
-
/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
unsigned __value = val; \
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
ubyte cs_num_images_in_user_sgprs;
ubyte num_vs_inputs;
ubyte num_vbos_in_user_sgprs;
- unsigned pa_cl_vs_out_cntl;
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
ubyte clipdist_mask;
ubyte culldist_mask;
clipdist_mask | (culldist_mask << 8);
radeon_begin(&sctx->gfx_cs);
-
- if (sctx->chip_class >= GFX10) {
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,
- ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
- } else {
- radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
- vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl);
- }
+ radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
+ pa_cl_cntl | vs->pa_cl_vs_out_cntl);
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
radeon_end_update_context_roll(sctx);
uint32_t sh_base[SI_NUM_SHADERS];
};
-#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
- (S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) | \
- S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
- S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | \
- S_02881C_USE_VTX_VRS_RATE(1))
-
/* The list of registers whose emitted values are remembered by si_context. */
enum si_tracked_reg
{
SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/
- SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */
+ SI_TRACKED_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
struct pipe_context *ctx = (struct pipe_context *)sctx;
struct si_shader *old_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
unsigned old_kill_clip_distances = old_vs ? old_vs->key.opt.kill_clip_distances : 0;
+ unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0;
struct si_shader *old_ps = sctx->shader.ps.current;
unsigned old_spi_shader_col_format =
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
*pm4 = si_build_vgt_shader_config(sctx->screen, key);
si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
- if (old_kill_clip_distances != si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.kill_clip_distances)
+ if (old_kill_clip_distances !=
+ si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.kill_clip_distances ||
+ old_pa_cl_vs_out_cntl !=
+ si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->pa_cl_vs_out_cntl)
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
r = si_shader_select(ctx, &sctx->shader.ps);
shader->ctx_reg.gs.vgt_gs_instance_cnt =
S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
+ /* Copy over fields from the GS copy shader to make them easily accessible from GS. */
+ shader->pa_cl_vs_out_cntl = sel->gs_copy_shader->pa_cl_vs_out_cntl;
+
va = shader->bo->gpu_address;
if (sscreen->info.chip_class >= GFX9) {
radeon_opt_set_context_reg(sctx, R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
shader->ctx_reg.ngg.pa_cl_ngg_cntl);
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
radeon_end_update_context_roll(sctx);
/* These don't cause a context roll. */
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
}
- if (sctx->chip_class >= GFX10) {
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
- }
radeon_end_update_context_roll(sctx);
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
key->mono.u.vs_export_prim_id = 0;
}
- /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
- key->opt.kill_pointsize = sctx->chip_class >= GFX10 &&
- vs->info.writes_psize &&
+ key->opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
}
}
}
- /* PA_CL_VS_OUT_CNTL */
- if (sctx->chip_class <= GFX9)
- sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false);
-
sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) <<
(!old_hw_vs ||
(old_hw_vs->info.stage == MESA_SHADER_VERTEX && old_hw_vs->info.base.vs.window_space_position) !=
(next_hw_vs->info.stage == MESA_SHADER_VERTEX && next_hw_vs->info.base.vs.window_space_position) ||
- old_hw_vs->pa_cl_vs_out_cntl != next_hw_vs->pa_cl_vs_out_cntl ||
old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant ||
!next_hw_vs_variant ||
- old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances))
+ old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances ||
+ old_hw_vs_variant->pa_cl_vs_out_cntl != next_hw_vs_variant->pa_cl_vs_out_cntl))
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
}