From 46213744c700092626db2f038910dd78bf8a48aa Mon Sep 17 00:00:00 2001 From: Matt Coster Date: Fri, 18 Nov 2022 16:04:05 +0000 Subject: [PATCH] pvr: Track max_shared_regs for compute jobs Signed-off-by: Matt Coster Reviewed-by: Karmjit Mahil Part-of: --- src/imagination/vulkan/pvr_cmd_buffer.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 832c0b5..9b4e885 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -2959,14 +2959,23 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, struct pvr_cmd_buffer_state *state = &cmd_buffer->state; struct pvr_csb *csb = &sub_cmd->control_stream; const struct pvr_compute_pipeline *pipeline = state->compute_pipeline; - const uint32_t const_shared_reg_count = + const uint32_t const_shared_regs = pipeline->shader_state.const_shared_reg_count; struct pvr_compute_kernel_info info; /* No shared regs, no need to use an allocation kernel. */ - if (!const_shared_reg_count) + if (!const_shared_regs) return; + /* Accumulate the MAX number of shared registers across the kernels in this + * dispatch. This is used by the FW for context switching, so must be large + * enough to contain all the shared registers that might be in use for this + * compute job. Coefficients don't need to be included as the context switch + * will not happen within the execution of a single workgroup, thus nothing + * needs to be preserved. + */ + state->max_shared_regs = MAX2(state->max_shared_regs, const_shared_regs); + info = (struct pvr_compute_kernel_info){ .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, .sd_type = PVRX(CDMCTRL_SD_TYPE_NONE), @@ -2974,7 +2983,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, .usc_target = PVRX(CDMCTRL_USC_TARGET_ALL), .usc_common_shared = true, .usc_common_size = - DIV_ROUND_UP(const_shared_reg_count, + DIV_ROUND_UP(const_shared_regs, PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)), .local_size = { 1, 1, 1 }, @@ -3011,7 +3020,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, /* We don't need to pad the workgroup size. */ info.max_instances = - pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U); + pvr_compute_flat_slot_size(pdevice, const_shared_regs, false, 1U); pvr_compute_generate_control_stream(csb, sub_cmd, &info); } @@ -3022,6 +3031,7 @@ void pvr_compute_update_shared_private( struct pvr_private_compute_pipeline *pipeline) { const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; const uint32_t const_shared_regs = pipeline->const_shared_regs_count; struct pvr_csb *csb = &sub_cmd->control_stream; struct pvr_compute_kernel_info info; @@ -3030,6 +3040,9 @@ void pvr_compute_update_shared_private( if (!const_shared_regs) return; + /* See comment in pvr_compute_update_shared() for details on this. */ + state->max_shared_regs = MAX2(state->max_shared_regs, const_shared_regs); + info = (struct pvr_compute_kernel_info){ .indirect_buffer_addr = PVR_DEV_ADDR_INVALID, .usc_common_size = -- 2.7.4