From 75b1c4997fe4de75dc4735937959c52f30f5c42b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 14 Dec 2017 15:51:19 +0100 Subject: [PATCH] radv: calculate best compute resource limits Ported from RadeonSI. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index eae5d40..d6aaff7 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2561,6 +2561,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) { struct radv_shader_variant *compute_shader; struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + unsigned compute_resource_limits; + unsigned waves_per_threadgroup; uint64_t va; if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) @@ -2572,7 +2574,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 16); + cmd_buffer->cs, 19); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2); radeon_emit(cmd_buffer->cs, va >> 8); @@ -2592,6 +2594,17 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) S_00B860_WAVES(pipeline->max_waves) | S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); + /* Calculate best compute resource limits. */ + waves_per_threadgroup = + DIV_ROUND_UP(compute_shader->info.cs.block_size[0] * + compute_shader->info.cs.block_size[1] * + compute_shader->info.cs.block_size[2], 64); + compute_resource_limits = + S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); + + radeon_set_sh_reg(cmd_buffer->cs, R_00B854_COMPUTE_RESOURCE_LIMITS, + compute_resource_limits); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cmd_buffer->cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0])); -- 2.7.4