From 5f27777379f5939871d4a5fdedc64b27a592d82a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 22 Sep 2020 13:13:05 -0400 Subject: [PATCH] radeonsi: add a tweak for PS wave CU utilization for gfx10.3 Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_state.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d9810a3..f13ca4f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5137,6 +5137,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) S_028034_BR_X(16384) | S_028034_BR_Y(16384)); } + unsigned cu_mask_ps = 0xffffffff; + + /* It's wasteful to enable all CUs for PS if shader arrays have a different + * number of CUs. The reason is that the hardware sends the same number of PS + * waves to each shader array, so the slowest shader array limits the performance. + * Disable the extra CUs for PS in other shader arrays to save power and thus + * increase clocks for busy CUs. In the future, we might disable or enable this + * tweak only for certain apps. + */ + if (sctx->chip_class >= GFX10_3) + cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); + if (sctx->chip_class >= GFX7) { /* Compute LATE_ALLOC_VS.LIMIT. */ unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa; @@ -5190,7 +5202,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F)); si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, - S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); + S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); } if (sctx->chip_class <= GFX8) { @@ -5269,7 +5281,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) if (sctx->chip_class >= GFX10) { /* Logical CUs 16 - 31 */ - si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(0xffff)); + si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16)); si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff)); si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff)); -- 2.7.4