pvr: Track max_shared_regs for compute jobs
authorMatt Coster <matt.coster@imgtec.com>
Fri, 18 Nov 2022 16:04:05 +0000 (16:04 +0000)
committerMarge Bot <emma+marge@anholt.net>
Thu, 8 Dec 2022 13:10:35 +0000 (13:10 +0000)
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20159>

src/imagination/vulkan/pvr_cmd_buffer.c

index 832c0b5..9b4e885 100644 (file)
@@ -2959,14 +2959,23 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
    struct pvr_csb *csb = &sub_cmd->control_stream;
    const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
-   const uint32_t const_shared_reg_count =
+   const uint32_t const_shared_regs =
       pipeline->shader_state.const_shared_reg_count;
    struct pvr_compute_kernel_info info;
 
    /* No shared regs, no need to use an allocation kernel. */
-   if (!const_shared_reg_count)
+   if (!const_shared_regs)
       return;
 
+   /* Accumulate the MAX number of shared registers across the kernels in this
+    * dispatch. This is used by the FW for context switching, so must be large
+    * enough to contain all the shared registers that might be in use for this
+    * compute job. Coefficients don't need to be included as the context switch
+    * will not happen within the execution of a single workgroup, thus nothing
+    * needs to be preserved.
+    */
+   state->max_shared_regs = MAX2(state->max_shared_regs, const_shared_regs);
+
    info = (struct pvr_compute_kernel_info){
       .indirect_buffer_addr = PVR_DEV_ADDR_INVALID,
       .sd_type = PVRX(CDMCTRL_SD_TYPE_NONE),
@@ -2974,7 +2983,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
       .usc_target = PVRX(CDMCTRL_USC_TARGET_ALL),
       .usc_common_shared = true,
       .usc_common_size =
-         DIV_ROUND_UP(const_shared_reg_count,
+         DIV_ROUND_UP(const_shared_regs,
                       PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)),
 
       .local_size = { 1, 1, 1 },
@@ -3011,7 +3020,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
    /* We don't need to pad the workgroup size. */
 
    info.max_instances =
-      pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U);
+      pvr_compute_flat_slot_size(pdevice, const_shared_regs, false, 1U);
 
    pvr_compute_generate_control_stream(csb, sub_cmd, &info);
 }
@@ -3022,6 +3031,7 @@ void pvr_compute_update_shared_private(
    struct pvr_private_compute_pipeline *pipeline)
 {
    const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
    const uint32_t const_shared_regs = pipeline->const_shared_regs_count;
    struct pvr_csb *csb = &sub_cmd->control_stream;
    struct pvr_compute_kernel_info info;
@@ -3030,6 +3040,9 @@ void pvr_compute_update_shared_private(
    if (!const_shared_regs)
       return;
 
+   /* See comment in pvr_compute_update_shared() for details on this. */
+   state->max_shared_regs = MAX2(state->max_shared_regs, const_shared_regs);
+
    info = (struct pvr_compute_kernel_info){
       .indirect_buffer_addr = PVR_DEV_ADDR_INVALID,
       .usc_common_size =