From 3ea5beca1faf4b871774cf3fb41d0039a59a3d2f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 8 Dec 2021 03:10:24 -0500 Subject: [PATCH] radv: apply spi_cu_en to CU_EN Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/vulkan/radv_cs.h | 11 +++++++++++ src/amd/vulkan/radv_pipeline.c | 45 +++++++++++++++++++++++++++++++++--------- src/amd/vulkan/si_cmd_buffer.c | 33 ++++++++++++++++++++++--------- 3 files changed, 71 insertions(+), 18 deletions(-) diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 277c77b..3a027eb 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -129,6 +129,17 @@ radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_ } static inline void +gfx10_set_sh_reg_idx3(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) +{ + assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); + assert(cs->cdw + 3 <= cs->max_dw); + + radeon_emit(cs, PKT3(PKT3_SET_SH_REG_INDEX, 1, 0)); + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (3 << 28)); + radeon_emit(cs, value); +} + +static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index da42fa9..b05a7ff 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -4765,8 +4765,15 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { - radeon_set_sh_reg_idx(pipeline->device->physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, - S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F)); + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { + ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, + S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), + C_00B118_CU_EN, 0, &pipeline->device->physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + } else { + radeon_set_sh_reg_idx(pipeline->device->physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, + S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F)); + } radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); } if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { @@ -4938,12 +4945,23 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf ac_compute_late_alloc(&pipeline->device->physical_device->rad_info, true, shader->info.has_ngg_culling, shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); - radeon_set_sh_reg_idx( - pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, - S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F)); - radeon_set_sh_reg_idx( - pipeline->device->physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, - S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { + ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), + C_00B21C_CU_EN, 0, &pipeline->device->physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), + C_00B204_CU_EN, 16, &pipeline->device->physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + } else { + radeon_set_sh_reg_idx( + pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, + S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F)); + radeon_set_sh_reg_idx( + pipeline->device->physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, + S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); + } uint32_t oversub_pc_lines = late_alloc_wave64 ? pipeline->device->physical_device->rad_info.pc_lines / 4 : 0; if (shader->info.has_ngg_culling) { @@ -5185,7 +5203,16 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf radeon_emit(cs, gs->config.rsrc2); } - if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { + ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), + C_00B21C_CU_EN, 0, &pipeline->device->physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, + S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), + C_00B204_CU_EN, 16, &pipeline->device->physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + } else if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_sh_reg_idx( pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 993ea3d..312e7c1 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -333,15 +333,23 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (physical_device->rad_info.chip_class >= GFX7) { if (physical_device->rad_info.chip_class >= GFX10) { /* Logical CUs 16 - 31 */ - radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3, - S_00B404_CU_EN(0xffff)); - radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3, - S_00B104_CU_EN(0xffff)); - radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3, - S_00B004_CU_EN(cu_mask_ps >> 16)); + ac_set_reg_cu_en(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff), + C_00B404_CU_EN, 16, &physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + ac_set_reg_cu_en(cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff), + C_00B104_CU_EN, 16, &physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + ac_set_reg_cu_en(cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16), + C_00B004_CU_EN, 16, &physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); } - if (physical_device->rad_info.chip_class >= GFX9) { + if (physical_device->rad_info.chip_class >= GFX10) { + ac_set_reg_cu_en(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, + S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), + C_00B41C_CU_EN, 0, &physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + } else if (physical_device->rad_info.chip_class == GFX9) { radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); } else { @@ -358,8 +366,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); } - radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3, - S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); + if (physical_device->rad_info.chip_class >= GFX10) { + ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, + S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F), + C_00B01C_CU_EN, 0, &physical_device->rad_info, + (void*)gfx10_set_sh_reg_idx3); + } else { + radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3, + S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); + } } if (physical_device->rad_info.chip_class >= GFX10) { -- 2.7.4