return CLAMP(workgroup_size, 1, 256);
}
-void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
- unsigned value_shift, const struct radeon_info *info,
- void set_sh_reg(void*, unsigned, uint32_t))
+uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
+ const struct radeon_info *info)
{
/* Register field position and mask. */
uint32_t cu_en_mask = ~clear_mask;
/* AND the field by spi_cu_en. */
uint32_t spi_cu_en = info->spi_cu_en >> value_shift;
- uint32_t new_value = (value & ~cu_en_mask) |
- (((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
-
- set_sh_reg(cs, reg_offset, new_value);
+ return (value & ~cu_en_mask) |
+ (((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
}
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
unsigned max_vtx_out, unsigned prim_amp_factor);
-void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
- unsigned value_shift, const struct radeon_info *info,
- void set_sh_reg(void*, unsigned, uint32_t));
+uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
+ const struct radeon_info *info);
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
}
static inline void
-gfx10_set_sh_reg_idx3(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
-{
- assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw + 3 <= cs->max_dw);
-
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG_INDEX, 1, 0));
- radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (3 << 28));
- radeon_emit(cs, value);
-}
-
-static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
&late_alloc_wave64, &cu_mask);
if (pdevice->rad_info.gfx_level >= GFX7) {
- if (pdevice->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
- S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
- C_00B118_CU_EN, 0, &pdevice->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- } else {
- radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
- S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
- }
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
+ ac_apply_cu_en(S_00B118_CU_EN(cu_mask) |
+ S_00B118_WAVE_LIMIT(0x3F),
+ C_00B118_CU_EN, 0, &pdevice->rad_info));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
}
if (pdevice->rad_info.gfx_level >= GFX10) {
ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling,
shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+ ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) |
+ S_00B21C_WAVE_LIMIT(0x3F),
+ C_00B21C_CU_EN, 0, &pdevice->rad_info));
+
if (pdevice->rad_info.gfx_level >= GFX11) {
- /* TODO: figure out how S_00B204_CU_EN_GFX11 interacts with ac_set_reg_cu_en */
- gfx10_set_sh_reg_idx3(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
- gfx10_set_sh_reg_idx3(
- cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
- } else if (pdevice->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F),
- C_00B21C_CU_EN, 0, &pdevice->rad_info, (void*)gfx10_set_sh_reg_idx3);
- ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
- C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+ ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
+ C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info));
} else {
- radeon_set_sh_reg_idx(
- pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
- S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
- radeon_set_sh_reg_idx(
- pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
- S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
+ C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
}
uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0;
radeon_emit(cs, gs->config.rsrc2);
}
- if (pdevice->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F),
- C_00B21C_CU_EN, 0, &pdevice->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
- C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- } else if (pdevice->rad_info.gfx_level >= GFX7) {
- radeon_set_sh_reg_idx(
- pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
- S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+ ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
+ S_00B21C_WAVE_LIMIT(0x3F),
+ C_00B21C_CU_EN, 0, &pdevice->rad_info));
- if (pdevice->rad_info.gfx_level >= GFX10) {
- radeon_set_sh_reg_idx(
- pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
- S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
- }
+ if (pdevice->rad_info.gfx_level >= GFX10) {
+ radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
+ C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
}
radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, pipeline->base.gs_copy_shader);
if (physical_device->rad_info.gfx_level >= GFX10 &&
physical_device->rad_info.gfx_level < GFX11) {
/* Logical CUs 16 - 31 */
- ac_set_reg_cu_en(cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
- C_00B104_CU_EN, 16, &physical_device->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3,
+ ac_apply_cu_en(S_00B104_CU_EN(0xffff),
+ C_00B104_CU_EN, 16, &physical_device->rad_info));
}
if (physical_device->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
- C_00B404_CU_EN, 16, &physical_device->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- ac_set_reg_cu_en(cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
- C_00B004_CU_EN, 16, &physical_device->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
+ ac_apply_cu_en(S_00B404_CU_EN(0xffff),
+ C_00B404_CU_EN, 16, &physical_device->rad_info));
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
+ ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16),
+ C_00B004_CU_EN, 16, &physical_device->rad_info));
}
- if (physical_device->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
- C_00B41C_CU_EN, 0, &physical_device->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- } else if (physical_device->rad_info.gfx_level == GFX9) {
+ if (physical_device->rad_info.gfx_level >= GFX9) {
radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
- S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+ ac_apply_cu_en(S_00B41C_CU_EN(0xffff) |
+ S_00B41C_WAVE_LIMIT(0x3F),
+ C_00B41C_CU_EN, 0, &physical_device->rad_info));
} else {
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ ac_apply_cu_en(S_00B51C_CU_EN(0xffff) |
+ S_00B51C_WAVE_LIMIT(0x3F),
+ C_00B51C_CU_EN, 0, &physical_device->rad_info));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
+ ac_apply_cu_en(S_00B31C_CU_EN(0xffff) |
+ S_00B31C_WAVE_LIMIT(0x3F),
+ C_00B31C_CU_EN, 0, &physical_device->rad_info));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- if (physical_device->rad_info.gfx_level >= GFX10) {
- ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
- S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) |
- S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
- C_00B01C_CU_EN, 0, &physical_device->rad_info,
- (void*)gfx10_set_sh_reg_idx3);
- } else {
- radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
- S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
- }
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
+ ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
+ S_00B01C_WAVE_LIMIT(0x3F) |
+ S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
+ C_00B01C_CU_EN, 0, &physical_device->rad_info));
}
if (physical_device->rad_info.gfx_level >= GFX10) {
radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
} while (0)
-#define radeon_set_sh_reg_idx3_seq(reg, num) do { \
+#define radeon_set_sh_reg_idx3_seq(sctx, reg, num) do { \
SI_CHECK_SHADOWED_REGS(reg, num); \
assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \
- radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | (3 << 28)); \
+ radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | ((sctx)->gfx_level >= GFX10 ? 3 << 28 : 0)); \
} while (0)
#define radeon_set_sh_reg(reg, value) do { \
radeon_emit(value); \
} while (0)
-#define radeon_set_sh_reg_idx3(reg, value) do { \
- radeon_set_sh_reg_idx3_seq(reg, 1); \
+#define radeon_set_sh_reg_idx3(sctx, reg, value) do { \
+ radeon_set_sh_reg_idx3_seq(sctx, reg, 1); \
radeon_emit(value); \
} while (0)
unsigned __value = val; \
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
sctx->tracked_regs.reg_value[reg] != __value) { \
- if (sctx->gfx_level >= GFX10) \
- radeon_set_sh_reg_idx3(offset, __value); \
- else \
- radeon_set_sh_reg(offset, __value); \
+ radeon_set_sh_reg_idx3(sctx, offset, __value); \
sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
sctx->tracked_regs.reg_value[reg] = __value; \
} \
radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
} while (0)
-/* Wrappers that are only used when they are passed as function pointers. */
-static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
- uint32_t value)
-{
- radeon_begin(cs);
- radeon_set_sh_reg(reg_offset, value);
- radeon_end();
-}
-
-static inline void radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
- uint32_t value)
-{
- radeon_begin(cs);
- radeon_set_sh_reg_idx3(reg_offset, value);
- radeon_end();
-}
-
/* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
}
-void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val)
+void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state,
+ unsigned reg, uint32_t val)
{
SI_CHECK_SHADOWED_REGS(reg, 1);
- si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
+ si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX,
+ sscreen->info.gfx_level >= GFX10 ? 3 : 0);
}
void si_pm4_clear_state(struct si_pm4_state *state)
extern "C" {
#endif
-// forward defines
+/* forward definitions */
+struct si_screen;
struct si_context;
/* State atoms are callbacks which write a sequence of packets into a GPU
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
-void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
+void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state,
+ unsigned reg, uint32_t val);
void si_pm4_clear_state(struct si_pm4_state *state);
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
cu_mask_ps = gfx103_get_cu_mask_ps(sscreen);
if (sctx->gfx_level >= GFX7) {
- ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
- S_00B01C_CU_EN(cu_mask_ps) |
- S_00B01C_WAVE_LIMIT(0x3F) |
- S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11),
- C_00B01C_CU_EN, 0, &sscreen->info,
- (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
+ S_00B01C_WAVE_LIMIT(0x3F) |
+ S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11),
+ C_00B01C_CU_EN, 0, &sscreen->info));
}
if (sctx->gfx_level <= GFX8) {
}
if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) {
- ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
- C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
+ C_00B51C_CU_EN, 0, &sscreen->info));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
- ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
- C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
+ C_00B31C_CU_EN, 0, &sscreen->info));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
}
if (sctx->gfx_level >= GFX9) {
- ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
- 0, &sscreen->info,
- (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
+ C_00B41C_CU_EN, 0, &sscreen->info));
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
if (sctx->gfx_level >= GFX10 && sctx->gfx_level <= GFX10_3) {
/* Logical CUs 16 - 31 */
- ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
- C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
- ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
- C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
- ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
- C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
+ ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16),
+ C_00B004_CU_EN, 16, &sscreen->info));
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
+ ac_apply_cu_en(S_00B104_CU_EN(0xffff),
+ C_00B104_CU_EN, 16, &sscreen->info));
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
+ ac_apply_cu_en(S_00B404_CU_EN(0xffff),
+ C_00B404_CU_EN, 16, &sscreen->info));
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
if (sscreen->info.gfx_level >= GFX9) {
if (sscreen->info.gfx_level >= GFX11) {
- ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
- S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
- S_00B404_CU_EN(0xffff),
- C_00B404_CU_EN, 16, &sscreen->info,
- (void (*)(void*, unsigned, uint32_t))si_pm4_set_reg_idx3);
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
+ ac_apply_cu_en(S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
+ S_00B404_CU_EN(0xffff),
+ C_00B404_CU_EN, 16, &sscreen->info));
}
if (sscreen->info.gfx_level >= GFX10) {
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
radeon_end_update_context_roll(sctx);
/* These don't cause any context rolls. */
- if (sctx->screen->info.spi_cu_en_has_effect) {
- if (sctx->gfx_level >= GFX7) {
- ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- shader->gs.spi_shader_pgm_rsrc3_gs,
- C_00B21C_CU_EN, 0, &sctx->screen->info,
- (void (*)(void*, unsigned, uint32_t))
- (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
- sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
- }
- if (sctx->gfx_level >= GFX10) {
- ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- shader->gs.spi_shader_pgm_rsrc4_gs,
- C_00B204_CU_EN_GFX10, 16, &sctx->screen->info,
- (void (*)(void*, unsigned, uint32_t))
- (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
- sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
- }
- } else {
- radeon_begin_again(&sctx->gfx_cs);
- if (sctx->gfx_level >= GFX7) {
- radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
- shader->gs.spi_shader_pgm_rsrc3_gs);
- }
- if (sctx->gfx_level >= GFX10) {
- radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
- shader->gs.spi_shader_pgm_rsrc4_gs);
- }
- radeon_end();
+ radeon_begin_again(&sctx->gfx_cs);
+ if (sctx->gfx_level >= GFX7) {
+ radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+ shader->gs.spi_shader_pgm_rsrc3_gs);
}
+ if (sctx->gfx_level >= GFX10) {
+ radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+ shader->gs.spi_shader_pgm_rsrc4_gs);
+ }
+ radeon_end();
}
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
- shader->gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
- S_00B21C_WAVE_LIMIT(0x3F);
- shader->gs.spi_shader_pgm_rsrc4_gs = S_00B204_CU_EN_GFX10(0xffff) |
- S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0);
+ shader->gs.spi_shader_pgm_rsrc3_gs =
+ ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
+ S_00B21C_WAVE_LIMIT(0x3F),
+ C_00B21C_CU_EN, 0, &sscreen->info);
+ shader->gs.spi_shader_pgm_rsrc4_gs =
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
+ C_00B204_CU_EN_GFX10, 16, &sscreen->info);
shader->gs.vgt_gs_onchip_cntl =
S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) |
polaris_set_vgt_vertex_reuse(sscreen, shader->key.ge.part.gs.es, shader);
} else {
- shader->gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
- S_00B21C_WAVE_LIMIT(0x3F);
+ shader->gs.spi_shader_pgm_rsrc3_gs =
+ ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
+ S_00B21C_WAVE_LIMIT(0x3F),
+ C_00B21C_CU_EN, 0, &sscreen->info);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
pm4->reg_va_low_idx = pm4->ndw - 1;
radeon_begin_again(&sctx->gfx_cs);
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
shader->ngg.ge_pc_alloc);
- if (sctx->screen->info.spi_cu_en_has_effect) {
- radeon_end();
- ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- shader->ngg.spi_shader_pgm_rsrc3_gs,
- C_00B21C_CU_EN, 0, &sctx->screen->info,
- (void (*)(void*, unsigned, uint32_t))
- (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
- ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- shader->ngg.spi_shader_pgm_rsrc4_gs,
- sctx->gfx_level >= GFX11 ? C_00B204_CU_EN_GFX11 : C_00B204_CU_EN_GFX10, 16,
- &sctx->screen->info,
- (void (*)(void*, unsigned, uint32_t))
- (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
- sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
- ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
- } else {
- radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
- shader->ngg.spi_shader_pgm_rsrc3_gs);
- radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
- shader->ngg.spi_shader_pgm_rsrc4_gs);
- radeon_end();
- }
+ radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+ shader->ngg.spi_shader_pgm_rsrc3_gs);
+ radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+ SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+ shader->ngg.spi_shader_pgm_rsrc4_gs);
+ radeon_end();
}
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL) |
S_00B22C_LDS_SIZE(shader->config.lds_size));
- shader->ngg.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(cu_mask) |
- S_00B21C_WAVE_LIMIT(0x3F);
+ shader->ngg.spi_shader_pgm_rsrc3_gs =
+ ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) |
+ S_00B21C_WAVE_LIMIT(0x3F),
+ C_00B21C_CU_EN, 0, &sscreen->info);
if (sscreen->info.gfx_level >= GFX11) {
shader->ngg.spi_shader_pgm_rsrc4_gs =
- S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64) |
- S_00B204_INST_PREF_SIZE(si_get_shader_prefetch_size(shader));
+ ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64) |
+ S_00B204_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)),
+ C_00B204_CU_EN_GFX11, 16, &sscreen->info);
} else {
shader->ngg.spi_shader_pgm_rsrc4_gs =
- S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64);
+ ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
+ S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
+ C_00B204_CU_EN_GFX10, 16, &sscreen->info);
}
nparams = MAX2(shader->info.nr_param_exports, 1);
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
if (sscreen->info.gfx_level >= GFX7) {
- ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
- S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
- C_00B118_CU_EN, 0, &sscreen->info,
- (void (*)(void*, unsigned, uint32_t))
- (sscreen->info.gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ ac_apply_cu_en(S_00B118_CU_EN(cu_mask) |
+ S_00B118_WAVE_LIMIT(0x3F),
+ C_00B118_CU_EN, 0, &sscreen->info));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
}
if (sscreen->info.gfx_level >= GFX11) {
unsigned cu_mask_ps = gfx103_get_cu_mask_ps(sscreen);
- ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
- S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
- S_00B004_CU_EN(cu_mask_ps >> 16),
- C_00B004_CU_EN, 16, &sscreen->info,
- (void (*)(void*, unsigned, uint32_t))si_pm4_set_reg_idx3);
+ si_pm4_set_reg_idx3(sscreen, pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
+ ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16) |
+ S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)),
+ C_00B004_CU_EN, 16, &sscreen->info));
}
}