v3dv: add the concept of a job

author Iago Toral Quiroga <itoral@igalia.com>

Wed, 8 Jan 2020 10:14:35 +0000 (11:14 +0100)

committer Marge Bot <eric+marge@anholt.net>

Tue, 13 Oct 2020 21:21:26 +0000 (21:21 +0000)
author Iago Toral Quiroga <itoral@igalia.com>
Wed, 8 Jan 2020 10:14:35 +0000 (11:14 +0100)
committer Marge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:26 +0000 (21:21 +0000)
diff --git a/src/broadcom/vulkan/v3dv_cl.c b/src/broadcom/vulkan/v3dv_cl.c

index d3494c5..e20e673 100644 (file)
--- a/src/broadcom/vulkan/v3dv_cl.c
+++ b/src/broadcom/vulkan/v3dv_cl.c
@@ -25,20 +25,18 @@
  #include "broadcom/cle/v3dx_pack.h"
  
  void
-v3dv_cl_init(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl *cl)
+v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl)
  {
     cl->base = NULL;
     cl->next = cl->base;
     cl->bo = NULL;
     cl->size = 0;
-   cl->cmd_buffer = cmd_buffer;
+   cl->job = job;
  }
  
  void
  v3dv_cl_begin(struct v3dv_cl *cl)
  {
-   assert(!cl->cmd_buffer ||
-          cl->cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED);
     assert(v3dv_cl_offset(cl) == 0);
  }
  
@@ -48,15 +46,15 @@ v3dv_cl_reset(struct v3dv_cl *cl)
     /* FIXME: consider keeping the BO when the command buffer is reset with
      * flag VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT.
      */
-   v3dv_cl_init(cl->cmd_buffer, cl);
+   v3dv_cl_init(cl->job, cl);
  }
  
  void
  v3dv_cl_destroy(struct v3dv_cl *cl)
  {
     if (cl->bo) {
-      assert(cl->cmd_buffer);
-      v3dv_bo_free(cl->cmd_buffer->device, cl->bo);
+      assert(cl->job);
+      v3dv_bo_free(cl->job->cmd_buffer->device, cl->bo);
     }
  
     /* Leave the CL in a reset state to catch use after destroy instances */
@@ -73,15 +71,15 @@ v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment)
        return offset;
     }
  
-   struct v3dv_bo *bo = v3dv_bo_alloc(cl->cmd_buffer->device, space);
+   struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space);
     if (!bo) {
        fprintf(stderr, "failed to allocate memory for command list");
        abort();
     }
  
-   v3dv_cmd_buffer_add_bo(cl->cmd_buffer, bo);
+   v3dv_job_add_bo(cl->job, bo);
  
-   bool ok = v3dv_bo_map(cl->cmd_buffer->device, bo, bo->size);
+   bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
     if (!ok) {
        fprintf(stderr, "failed to map command list buffer");
        abort();
@@ -102,7 +100,7 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
     if (v3dv_cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
        return;
  
-   struct v3dv_bo *bo = v3dv_bo_alloc(cl->cmd_buffer->device, space);
+   struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space);
     if (!bo) {
        fprintf(stderr, "failed to allocate memory for command list");
        abort();
@@ -115,9 +113,9 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
        }
     }
  
-   v3dv_cmd_buffer_add_bo(cl->cmd_buffer, bo);
+   v3dv_job_add_bo(cl->job, bo);
  
-   bool ok = v3dv_bo_map(cl->cmd_buffer->device, bo, bo->size);
+   bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
     if (!ok) {
        fprintf(stderr, "failed to map command list buffer");
        abort();
diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h

index f58b2d5..c95110f 100644 (file)
--- a/src/broadcom/vulkan/v3dv_cl.h
+++ b/src/broadcom/vulkan/v3dv_cl.h
@@ -27,10 +27,10 @@
  #include "broadcom/cle/v3d_packet_helpers.h"
  
  struct v3dv_bo;
-struct v3dv_cmd_buffer;
+struct v3dv_job;
  struct v3dv_cl;
  
-void v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo);
+void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
  
  /**
   * Undefined structure, used for typechecking that you're passing the pointers
@@ -46,7 +46,7 @@ struct v3dv_cl_reloc {
  
  struct v3dv_cl {
     void *base;
-   struct v3dv_cmd_buffer *cmd_buffer;
+   struct v3dv_job *job;
     struct v3dv_cl_out *next;
     struct v3dv_bo *bo;
     uint32_t size;
@@ -82,7 +82,7 @@ v3dv_cl_get_address(struct v3dv_cl *cl)
     return (struct v3dv_cl_reloc){ .bo = cl->bo, .offset = v3dv_cl_offset(cl) };
  }
  
-void v3dv_cl_init(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_cl *cl);
+void v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl);
  void v3dv_cl_begin(struct v3dv_cl *cl);
  void v3dv_cl_reset(struct v3dv_cl *cl);
  void v3dv_cl_destroy(struct v3dv_cl *cl);
@@ -167,7 +167,7 @@ static inline void
  cl_pack_emit_reloc(struct v3dv_cl *cl, const struct v3dv_cl_reloc *reloc)
  {
          if (reloc->bo)
-                v3dv_cmd_buffer_add_bo(cl->cmd_buffer, reloc->bo);
+                v3dv_job_add_bo(cl->job, reloc->bo);
  }
  
  #endif /* V3DV_CL_H */
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c

index ca302db..caf2d2a 100644 (file)
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -35,16 +35,16 @@ const struct v3dv_dynamic_state default_dynamic_state = {
  };
  
  void
-v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo)
+v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
  {
     if (!bo)
        return;
  
-   if (_mesa_set_search(cmd_buffer->bos, bo))
+   if (_mesa_set_search(job->bos, bo))
        return;
  
-   _mesa_set_add(cmd_buffer->bos, bo);
-   cmd_buffer->bo_count++;
+   _mesa_set_add(job->bos, bo);
+   job->bo_count++;
  }
  
  VkResult
@@ -94,13 +94,7 @@ cmd_buffer_create(struct v3dv_device *device,
     cmd_buffer->level = level;
     cmd_buffer->usage_flags = 0;
  
-   cmd_buffer->bos =
-      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
-   cmd_buffer->bo_count = 0;
-
-   v3dv_cl_init(cmd_buffer, &cmd_buffer->bcl);
-   v3dv_cl_init(cmd_buffer, &cmd_buffer->rcl);
-   v3dv_cl_init(cmd_buffer, &cmd_buffer->indirect);
+   list_inithead(&cmd_buffer->submit_jobs);
  
     cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_NEW;
  
@@ -113,48 +107,114 @@ cmd_buffer_create(struct v3dv_device *device,
  }
  
  static void
-cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
+job_destroy(struct v3dv_job *job)
  {
-   list_del(&cmd_buffer->pool_link);
+   assert(job);
+
+   list_del(&job->list_link);
  
-   v3dv_cl_destroy(&cmd_buffer->bcl);
-   v3dv_cl_destroy(&cmd_buffer->rcl);
-   v3dv_cl_destroy(&cmd_buffer->indirect);
+   v3dv_cl_destroy(&job->bcl);
+   v3dv_cl_destroy(&job->rcl);
+   v3dv_cl_destroy(&job->indirect);
  
     /* Since we don't ref BOs, when we add them to the command buffer, don't
      * unref them here either.
      */
  #if 0
-   set_foreach(cmd_buffer->bos, entry) {
+   set_foreach(job->bos, entry) {
        struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
        v3dv_bo_free(cmd_buffer->device, bo);
     }
  #endif
-   _mesa_set_destroy(cmd_buffer->bos, NULL);
+   _mesa_set_destroy(job->bos, NULL);
+
+   v3dv_bo_free(job->cmd_buffer->device, job->tile_alloc);
+   v3dv_bo_free(job->cmd_buffer->device, job->tile_state);
+}
+
+static void
+cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   list_del(&cmd_buffer->pool_link);
  
-   v3dv_bo_free(cmd_buffer->device, cmd_buffer->tile_alloc);
-   v3dv_bo_free(cmd_buffer->device, cmd_buffer->tile_state);
+   list_for_each_entry_safe(struct v3dv_job, job,
+                            &cmd_buffer->submit_jobs, list_link) {
+      job_destroy(job);
+   }
+
+   if (cmd_buffer->state.job)
+      job_destroy(cmd_buffer->state.job);
  
     vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
  }
  
+static void
+emit_binning_flush(struct v3dv_job *job)
+{
+   assert(job);
+   v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
+   cl_emit(&job->bcl, FLUSH, flush);
+}
+
+void
+v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+   assert(v3dv_cl_offset(&job->bcl) != 0);
+
+   list_addtail(&job->list_link, &cmd_buffer->submit_jobs);
+   cmd_buffer->state.job = NULL;
+}
+
+struct v3dv_job *
+v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   /* Ensure we are not starting a new job without finishing a previous one */
+   if (cmd_buffer->state.job != NULL) {
+      emit_binning_flush(cmd_buffer->state.job);
+      v3dv_cmd_buffer_finish_job(cmd_buffer);
+   }
+
+   assert(cmd_buffer->state.job == NULL);
+   struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->alloc,
+                                    sizeof(struct v3dv_job), 8,
+                                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   assert(job);
+
+   job->cmd_buffer = cmd_buffer;
+
+   job->bos =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   job->bo_count = 0;
+
+   v3dv_cl_init(job, &job->bcl);
+   v3dv_cl_begin(&job->bcl);
+
+   v3dv_cl_init(job, &job->rcl);
+   v3dv_cl_begin(&job->rcl);
+
+   v3dv_cl_init(job, &job->indirect);
+   v3dv_cl_begin(&job->indirect);
+
+   cmd_buffer->state.job = job;
+   return job;
+}
+
  static VkResult
  cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer)
  {
     if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
-      cmd_buffer->usage_flags = 0;
+      /* FIXME */
+      assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_NEW);
  
-      _mesa_set_clear(cmd_buffer->bos, NULL);
-      cmd_buffer->bo_count = 0;
-
-      v3dv_cl_reset(&cmd_buffer->bcl);
-      v3dv_cl_reset(&cmd_buffer->rcl);
-      v3dv_cl_reset(&cmd_buffer->indirect);
+      cmd_buffer->usage_flags = 0;
  
        struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
        state->pass = NULL;
        state->framebuffer = NULL;
        state->subpass_idx = 0;
+      state->job = NULL;
  
        cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED;
     }
@@ -248,19 +308,16 @@ v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
  
     cmd_buffer->usage_flags = pBeginInfo->flags;
  
-   v3dv_cl_begin(&cmd_buffer->bcl);
-   v3dv_cl_begin(&cmd_buffer->rcl);
-   v3dv_cl_begin(&cmd_buffer->indirect);
-
     cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING;
  
     return VK_SUCCESS;
  }
  
  static void
-emit_clip_window(struct v3dv_cmd_buffer *cmd_buffer, VkRect2D *rect)
+emit_clip_window(struct v3dv_job *job, const VkRect2D *rect)
  {
-   cl_emit(&cmd_buffer->bcl, CLIP_WINDOW, clip) {
+   assert(job);
+   cl_emit(&job->bcl, CLIP_WINDOW, clip) {
        clip.clip_window_left_pixel_coordinate = rect->offset.x;
        clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
        clip.clip_window_width_in_pixels = rect->extent.width;
@@ -349,90 +406,12 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
                                       pRenderPassBegin->clearValueCount,
                                       pRenderPassBegin->pClearValues);
  
-   v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, 256);
-
-   /* The PTB will request the tile alloc initial size per tile at start
-    * of tile binning.
-    */
-   const uint32_t fb_layers = 1; /* FIXME */
-   uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
-                              framebuffer->draw_tiles_x *
-                              framebuffer->draw_tiles_y;
-
-   /* The PTB allocates in aligned 4k chunks after the initial setup. */
-   tile_alloc_size = align(tile_alloc_size, 4096);
-
-   /* Include the first two chunk allocations that the PTB does so that
-    * we definitely clear the OOM condition before triggering one (the HW
-    * won't trigger OOM during the first allocations).
-    */
-   tile_alloc_size += 8192;
-
-   /* For performance, allocate some extra initial memory after the PTB's
-    * minimal allocations, so that we hopefully don't have to block the
-    * GPU on the kernel handling an OOM signal.
-    */
-   tile_alloc_size += 512 * 1024;
-
-   cmd_buffer->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
-   v3dv_cmd_buffer_add_bo(cmd_buffer, cmd_buffer->tile_alloc);
-
-   const uint32_t tsda_per_tile_size = 256;
-   const uint32_t tile_state_size = MAX2(fb_layers, 1) *
-                                    framebuffer->draw_tiles_x *
-                                    framebuffer->draw_tiles_y *
-                                    tsda_per_tile_size;
-   cmd_buffer->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
-   v3dv_cmd_buffer_add_bo(cmd_buffer, cmd_buffer->tile_state);
-
-   /* This must go before the binning mode configuration. It is
-    * required for layered framebuffers to work.
-    */
-   if (fb_layers > 0) {
-      cl_emit(&cmd_buffer->bcl, NUMBER_OF_LAYERS, config) {
-         config.number_of_layers = fb_layers;
-      }
-   }
-
-   cl_emit(&cmd_buffer->bcl, TILE_BINNING_MODE_CFG, config) {
-      config.width_in_pixels = framebuffer->width;
-      config.height_in_pixels = framebuffer->height;
-      config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
-      config.multisample_mode_4x = false; /* FIXME */
-      config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
-   }
-
-   /* There's definitely nothing in the VCD cache we want. */
-   cl_emit(&cmd_buffer->bcl, FLUSH_VCD_CACHE, bin);
-
-   /* Disable any leftover OQ state from another job. */
-   cl_emit(&cmd_buffer->bcl, OCCLUSION_QUERY_COUNTER, counter);
-
-   /* "Binning mode lists must have a Start Tile Binning item (6) after
-    *  any prefix state data before the binning list proper starts."
-    */
-   cl_emit(&cmd_buffer->bcl, START_TILE_BINNING, bin);
-
     /* FIXME: probably need to align the render area to tile boundaries since
      *        the tile clears will render full tiles anyway.
      *        See vkGetRenderAreaGranularity().
      */
     state->render_area = pRenderPassBegin->renderArea;
  
-   /* If we don't have a scissor or viewport defined let's just use the render
-    * area as clip_window, as that would be required for a clear in any
-    * case. If we have that, it would be emitted as part of the pipeline
-    * dynamic state flush
-    *
-    * FIXME: this is mostly just needed for clear. radv has dedicated paths
-    * for them, so we could get that idea. In any case, need to revisit if
-    * this is the place to emit the clip window.
-    */
-   if (cmd_buffer->state.dynamic.scissor.count == 0 &&
-       cmd_buffer->state.dynamic.viewport.count == 0) {
-      emit_clip_window(cmd_buffer, &state->render_area);
-   }
-
     /* Setup for first subpass */
     state->subpass_idx = 0;
  }
@@ -627,10 +606,13 @@ emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
  static void
  emit_generic_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
  {
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
     /* Emit the generic list in our indirect state -- the rcl will just
      * have pointers into it.
      */
-   struct v3dv_cl *cl = &cmd_buffer->indirect;
+   struct v3dv_cl *cl = &job->indirect;
     v3dv_cl_ensure_space(cl, 200, 1);
     struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
  
@@ -653,7 +635,7 @@ emit_generic_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
  
     cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
  
-   cl_emit(&cmd_buffer->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
        branch.start = tile_list_start;
        branch.end = v3dv_cl_get_address(cl);
     }
@@ -665,7 +647,8 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
     const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
     const struct v3dv_framebuffer *framebuffer = state->framebuffer;
  
-   struct v3dv_cl *rcl = &cmd_buffer->rcl;
+   struct v3dv_job *job = cmd_buffer->state.job;
+   struct v3dv_cl *rcl = &job->rcl;
  
     /* If doing multicore binning, we would need to initialize each
      * core's tile list here.
@@ -673,7 +656,7 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
     const uint32_t tile_alloc_offset =
        64 * layer * framebuffer->draw_tiles_x * framebuffer->draw_tiles_y;
     cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
-      list.address = v3dv_cl_address(cmd_buffer->tile_alloc, tile_alloc_offset);
+      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
     }
  
     cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
@@ -758,10 +741,13 @@ emit_render_layer(struct v3dv_cmd_buffer *cmd_buffer, uint32_t layer)
  static void
  emit_rcl(struct v3dv_cmd_buffer *cmd_buffer)
  {
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
     /* FIXME */
     const uint32_t fb_layers = 1;
  
-   v3dv_cl_ensure_space_with_branch(&cmd_buffer->rcl, 200 +
+   v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
                                      MAX2(fb_layers, 1) * 256 *
                                      cl_packet_length(SUPERTILE_COORDINATES));
  
@@ -772,7 +758,7 @@ emit_rcl(struct v3dv_cmd_buffer *cmd_buffer)
     const struct v3dv_subpass *subpass =
        &state->pass->subpasses[state->subpass_idx];
  
-   struct v3dv_cl *rcl = &cmd_buffer->rcl;
+   struct v3dv_cl *rcl = &job->rcl;
  
     /* Comon config must be the first TILE_RENDERING_MODE_CFG and
      * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
@@ -892,7 +878,7 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
     for (uint32_t i = 0; i < subpass->color_count; i++) {
        uint32_t rp_attachment_idx = subpass->color_attachments[i].attachment;
        const struct v3dv_render_pass_attachment *attachment =
-         &cmd_buffer->state.pass->attachments[rp_attachment_idx];
+         &state->pass->attachments[rp_attachment_idx];
  
        /* FIXME: if a previous subpass has alredy computed the hw clear color
         *        for this attachment we could skip this. We can just flag this
@@ -904,7 +890,7 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
  
        const uint32_t sp_attachment_idx = i;
        const struct v3dv_image_view *iview =
-         cmd_buffer->state.framebuffer->attachments[sp_attachment_idx];
+         state->framebuffer->attachments[sp_attachment_idx];
  
        assert((iview->aspects &
                (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) == 0);
@@ -917,20 +903,113 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer)
                                                       clear_color);
        }
     }
+
+   /* FIXME: for now, each subpass goes into a separate job. In the future we
+    * might be able to merge subpasses that render to the same render targets
+    * so long as they don't render to more than 4 color attachments and there
+    * aren't other subpass dependencies preveting this.
+    */
+   struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer);
+
+   const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+
+   /* Setup binning for this subpass.
+    *
+    * FIXME: For now we do this at the start each subpass but if we implement
+    * subpass merges in the future we would only want to emit this once per job.
+    */
+   v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
+
+   /* The PTB will request the tile alloc initial size per tile at start
+    * of tile binning.
+    */
+   const uint32_t fb_layers = 1; /* FIXME */
+   uint32_t tile_alloc_size = 64 * MAX2(fb_layers, 1) *
+                              framebuffer->draw_tiles_x *
+                              framebuffer->draw_tiles_y;
+
+   /* The PTB allocates in aligned 4k chunks after the initial setup. */
+   tile_alloc_size = align(tile_alloc_size, 4096);
+
+   /* Include the first two chunk allocations that the PTB does so that
+    * we definitely clear the OOM condition before triggering one (the HW
+    * won't trigger OOM during the first allocations).
+    */
+   tile_alloc_size += 8192;
+
+   /* For performance, allocate some extra initial memory after the PTB's
+    * minimal allocations, so that we hopefully don't have to block the
+    * GPU on the kernel handling an OOM signal.
+    */
+   tile_alloc_size += 512 * 1024;
+
+   job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
+   v3dv_job_add_bo(job, job->tile_alloc);
+
+   const uint32_t tsda_per_tile_size = 256;
+   const uint32_t tile_state_size = MAX2(fb_layers, 1) *
+                                    framebuffer->draw_tiles_x *
+                                    framebuffer->draw_tiles_y *
+                                    tsda_per_tile_size;
+   job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
+   v3dv_job_add_bo(job, job->tile_state);
+
+   /* This must go before the binning mode configuration. It is
+    * required for layered framebuffers to work.
+    */
+   if (fb_layers > 0) {
+      cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
+         config.number_of_layers = fb_layers;
+      }
+   }
+
+   cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
+      config.width_in_pixels = framebuffer->width;
+      config.height_in_pixels = framebuffer->height;
+      config.number_of_render_targets = MAX2(framebuffer->attachment_count, 1);
+      config.multisample_mode_4x = false; /* FIXME */
+      config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
+   }
+
+   /* There's definitely nothing in the VCD cache we want. */
+   cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+   /* Disable any leftover OQ state from another job. */
+   cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+
+   /* "Binning mode lists must have a Start Tile Binning item (6) after
+    *  any prefix state data before the binning list proper starts."
+    */
+   cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+   /* If we don't have a scissor or viewport defined let's just use the render
+    * area as clip_window, as that would be required for a clear in any
+    * case. If we have that, it would be emitted as part of the pipeline
+    * dynamic state flush
+    *
+    * FIXME: this is mostly just needed for clear. radv has dedicated paths
+    * for them, so we could get that idea. In any case, need to revisit if
+    * this is the place to emit the clip window.
+    */
+   if (cmd_buffer->state.dynamic.scissor.count == 0 &&
+       cmd_buffer->state.dynamic.viewport.count == 0) {
+      emit_clip_window(job, &state->render_area);
+   }
  }
  
  static void
  subpass_finish(struct v3dv_cmd_buffer *cmd_buffer)
  {
-   v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, cl_packet_length(FLUSH));
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
  
-   /* We need to emit a flush between binning jobs, so do this before we start
-    * recording the next subpass.
+   /* This finishes the a binning job.
      *
      * FIXME: if the next subpass draws to the same RTs, we could skip this
      * and the binning setup for the next subpass.
      */
-   cl_emit(&cmd_buffer->bcl, FLUSH, flush);
+   emit_binning_flush(job);
+   v3dv_cmd_buffer_finish_job(cmd_buffer);
  }
  
  static void
@@ -961,11 +1040,18 @@ v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
  {
     V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
  
-   if (v3dv_cl_offset(&cmd_buffer->bcl) == 0)
-      return VK_SUCCESS; /* FIXME? */
-
     cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_EXECUTABLE;
  
+   struct v3dv_job *job = cmd_buffer->state.job;
+   if (!job)
+      return VK_SUCCESS;
+
+   /* We get here if we recorded commands after the last render pass in the
+    * command buffer. Make sure we finish this last job. */
+   assert(v3dv_cl_offset(&job->bcl) != 0);
+   emit_binning_flush(job);
+   v3dv_cmd_buffer_finish_job(cmd_buffer);
+
     return VK_SUCCESS;
  }
  
@@ -1028,11 +1114,11 @@ v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
  
        /* FIXME: is here the best moment to do that? or when drawing? */
        if (pipeline->vs->assembly_bo)
-         v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->vs->assembly_bo);
+         v3dv_job_add_bo(cmd_buffer->state.job, pipeline->vs->assembly_bo);
        if (pipeline->vs_bin->assembly_bo)
-         v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->vs_bin->assembly_bo);
+         v3dv_job_add_bo(cmd_buffer->state.job, pipeline->vs_bin->assembly_bo);
        if (pipeline->fs->assembly_bo)
-         v3dv_cmd_buffer_add_bo(cmd_buffer, pipeline->fs->assembly_bo);
+         v3dv_job_add_bo(cmd_buffer->state.job, pipeline->fs->assembly_bo);
  
        cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
        break;
@@ -1181,7 +1267,7 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
     clip_window.extent.width = maxx - minx;
     clip_window.extent.height = maxy - miny;
  
-   emit_clip_window(cmd_buffer, &clip_window);
+   emit_clip_window(cmd_buffer->state.job, &clip_window);
  }
  
  static void
@@ -1194,23 +1280,26 @@ emit_viewport(struct v3dv_cmd_buffer *cmd_buffer)
     float *vptranslate = dynamic->viewport.translate[0];
     float *vpscale = dynamic->viewport.scale[0];
  
-   cl_emit(&cmd_buffer->bcl, CLIPPER_XY_SCALING, clip) {
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
+   cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
        clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
        clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
     }
  
-   cl_emit(&cmd_buffer->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+   cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
        clip.viewport_z_offset_zc_to_zs = vptranslate[2];
        clip.viewport_z_scale_zc_to_zs = vpscale[2];
     }
-   cl_emit(&cmd_buffer->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+   cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
        float z1 = (vptranslate[2] - vpscale[2]);
        float z2 = (vptranslate[2] + vpscale[2]);
        clip.minimum_zw = MIN2(z1, z2);
        clip.maximum_zw = MAX2(z1, z2);
     }
  
-   cl_emit(&cmd_buffer->bcl, VIEWPORT_OFFSET, vp) {
+   cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
        vp.viewport_centre_x_coordinate = vptranslate[0];
        vp.viewport_centre_y_coordinate = vptranslate[1];
     }
@@ -1233,9 +1322,11 @@ struct vpm_config {
  static void
  cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
  {
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
     struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
     struct v3dv_pipeline *pipeline = state->pipeline;
-
     assert(pipeline);
  
     /* Upload the uniforms to the indirect CL first */
@@ -1249,9 +1340,9 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
        v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin);
  
     /* Update the cache dirty flag based on the shader progs data */
-   state->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
-   state->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
-   state->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
  
     /* FIXME: fake vtx->num_elements, that is the vertex state that includes
      * data from the buffers used on the vertex. Such info is still not
@@ -1267,7 +1358,7 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
     uint32_t num_elements_to_emit = MAX2(vtx_num_elements, 1);
  
     uint32_t shader_rec_offset =
-      v3dv_cl_ensure_space(&cmd_buffer->indirect,
+      v3dv_cl_ensure_space(&job->indirect,
                             cl_packet_length(GL_SHADER_STATE_RECORD) +
                             num_elements_to_emit *
                             cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
@@ -1286,7 +1377,7 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
     vpm_cfg.Ve = 0;
     vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
  
-   cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_RECORD, shader) {
+   cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
        shader.enable_clipping = true;
  
        shader.point_size_in_shaded_vertex_data =
@@ -1400,9 +1491,9 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
         * by CS and VS.  If we have no attributes being consumed by
         * the shader, set up a dummy to be loaded into the VPM.
         */
-      cl_emit(&cmd_buffer->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+      cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
           /* Valid address of data whose value will be unused. */
-         attr.address = v3dv_cl_address(cmd_buffer->indirect.bo, 0);
+         attr.address = v3dv_cl_address(job->indirect.bo, 0);
  
           attr.type = ATTRIBUTE_FLOAT;
           attr.stride = 0;
@@ -1413,13 +1504,13 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
        }
     }
  
-   cl_emit(&cmd_buffer->bcl, VCM_CACHE_SIZE, vcm) {
+   cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
        vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc;
        vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
     }
  
-   cl_emit(&cmd_buffer->bcl, GL_SHADER_STATE, state) {
-      state.address = v3dv_cl_address(cmd_buffer->indirect.bo,
+   cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+      state.address = v3dv_cl_address(job->indirect.bo,
                                        shader_rec_offset);
        state.number_of_attribute_arrays = num_elements_to_emit;
     }
@@ -1462,6 +1553,9 @@ static void
  cmd_buffer_emit_draw_packets(struct v3dv_cmd_buffer *cmd_buffer,
                               struct v3dv_draw_info *info)
  {
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
     struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
     struct v3dv_pipeline *pipeline = state->pipeline;
  
@@ -1473,7 +1567,7 @@ cmd_buffer_emit_draw_packets(struct v3dv_cmd_buffer *cmd_buffer,
     /* FIXME: using VERTEX_ARRAY_PRIMS always as it fits our test caselist
      * right now. Need to be choosen based on the current case.
      */
-   cl_emit(&cmd_buffer->bcl, VERTEX_ARRAY_PRIMS, prim) {
+   cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
        prim.mode = hw_prim_type | prim_tf_enable;
        prim.length = info->vertex_count;
        prim.index_of_first_vertex = info->first_vertex;
diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c

index a7728a2..fc55be8 100644 (file)
--- a/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -27,8 +27,7 @@
  #include "vk_format_info.h"
  
  static void
-emit_image_loads(struct v3dv_cmd_buffer *cmd_buffer,
-                 struct v3dv_cl *cl,
+emit_image_loads(struct v3dv_cl *cl,
                   struct v3dv_image *image,
                   uint32_t layer,
                   uint32_t mip_level)
@@ -67,8 +66,7 @@ emit_image_loads(struct v3dv_cmd_buffer *cmd_buffer,
  }
  
  static void
-emit_buffer_stores(struct v3dv_cmd_buffer *cmd_buffer,
-                   struct v3dv_cl *cl,
+emit_buffer_stores(struct v3dv_cl *cl,
                     struct v3dv_buffer *buffer,
                     struct v3dv_image *image,
                     uint32_t buffer_offset,
@@ -92,13 +90,13 @@ emit_buffer_stores(struct v3dv_cmd_buffer *cmd_buffer,
  }
  
  static void
-emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
                                          struct v3dv_buffer *buffer,
                                          struct v3dv_image *image,
                                          uint32_t layer,
                                          const VkBufferImageCopy *region)
  {
-   struct v3dv_cl *cl = &cmd_buffer->indirect;
+   struct v3dv_cl *cl = &job->indirect;
     v3dv_cl_ensure_space(cl, 200, 1);
     struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
  
@@ -108,8 +106,7 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
     assert(layer < imgrsc->layerCount);
  
     /* Load image to TLB */
-   emit_image_loads(cmd_buffer, cl, image,
-                    imgrsc->baseArrayLayer + layer, imgrsc->mipLevel);
+   emit_image_loads(cl, image, imgrsc->baseArrayLayer + layer, imgrsc->mipLevel);
  
     cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
        fmt.primitive_type = LIST_TRIANGLES;
@@ -130,21 +127,20 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_cmd_buffer *cmd_buffer,
     uint32_t buffer_stride = width * image->cpp;
     uint32_t buffer_offset =
        region->bufferOffset + height * buffer_stride * layer;
-   emit_buffer_stores(cmd_buffer, cl, buffer, image,
-                      buffer_offset, buffer_stride);
+   emit_buffer_stores(cl, buffer, image, buffer_offset, buffer_stride);
  
     cl_emit(cl, END_OF_TILE_MARKER, end);
  
     cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
  
-   cl_emit(&cmd_buffer->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
        branch.start = tile_list_start;
        branch.end = v3dv_cl_get_address(cl);
     }
  }
  
  static void
-emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_layer_to_buffer(struct v3dv_job *job,
                            uint32_t min_x_supertile,
                            uint32_t min_y_supertile,
                            uint32_t max_x_supertile,
@@ -155,12 +151,12 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
                            uint32_t layer,
                            const VkBufferImageCopy *region)
  {
-   struct v3dv_cl *rcl = &cmd_buffer->rcl;
+   struct v3dv_cl *rcl = &job->rcl;
  
     const uint32_t tile_alloc_offset =
        64 * layer * framebuffer->draw_tiles_x * framebuffer->draw_tiles_y;
     cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
-      list.address = v3dv_cl_address(cmd_buffer->tile_alloc, tile_alloc_offset);
+      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
     }
  
     cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
@@ -189,8 +185,7 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
  
     cl_emit(rcl, FLUSH_VCD_CACHE, flush);
  
-   emit_copy_layer_to_buffer_per_tile_list(cmd_buffer, buffer, image,
-                                           layer, region);
+   emit_copy_layer_to_buffer_per_tile_list(job, buffer, image, layer, region);
  
     for (int y = min_y_supertile; y <= max_y_supertile; y++) {
        for (int x = min_x_supertile; x <= max_x_supertile; x++) {
@@ -203,7 +198,7 @@ emit_copy_layer_to_buffer(struct v3dv_cmd_buffer *cmd_buffer,
  }
  
  static void
-emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
                                struct v3dv_buffer *buffer,
                                struct v3dv_image *image,
                                struct v3dv_framebuffer *framebuffer,
@@ -212,7 +207,7 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
  {
     const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
  
-   struct v3dv_cl *rcl = &cmd_buffer->rcl;
+   struct v3dv_cl *rcl = &job->rcl;
     v3dv_cl_ensure_space_with_branch(rcl, 200 +
                                      imgrsc->layerCount * 256 *
                                      cl_packet_length(SUPERTILE_COORDINATES));
@@ -263,7 +258,7 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
     const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
  
     for (int layer = 0; layer < imgrsc->layerCount; layer++) {
-      emit_copy_layer_to_buffer(cmd_buffer,
+      emit_copy_layer_to_buffer(job,
                                  min_x_supertile, min_y_supertile,
                                  max_x_supertile, max_y_supertile,
                                  buffer, image, framebuffer,
@@ -275,17 +270,17 @@ emit_copy_image_to_buffer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
  }
  
  static void
-emit_copy_image_to_buffer_bcl(struct v3dv_cmd_buffer *cmd_buffer,
+emit_copy_image_to_buffer_bcl(struct v3dv_job *job,
                                struct v3dv_framebuffer *framebuffer,
                                const VkBufferImageCopy *region)
  {
-   v3dv_cl_ensure_space_with_branch(&cmd_buffer->bcl, 256);
+   v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
  
-   cl_emit(&cmd_buffer->bcl, NUMBER_OF_LAYERS, config) {
+   cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
        config.number_of_layers = framebuffer->layers;
     }
  
-   cl_emit(&cmd_buffer->bcl, TILE_BINNING_MODE_CFG, config) {
+   cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
        config.width_in_pixels = framebuffer->width;
        config.height_in_pixels = framebuffer->height;
        config.number_of_render_targets = 1;
@@ -293,20 +288,20 @@ emit_copy_image_to_buffer_bcl(struct v3dv_cmd_buffer *cmd_buffer,
        config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
     }
  
-   cl_emit(&cmd_buffer->bcl, FLUSH_VCD_CACHE, bin);
+   cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
  
-   cl_emit(&cmd_buffer->bcl, OCCLUSION_QUERY_COUNTER, counter);
+   cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
  
-   cl_emit(&cmd_buffer->bcl, START_TILE_BINNING, bin);
+   cl_emit(&job->bcl, START_TILE_BINNING, bin);
  
-   cl_emit(&cmd_buffer->bcl, CLIP_WINDOW, clip) {
+   cl_emit(&job->bcl, CLIP_WINDOW, clip) {
        clip.clip_window_left_pixel_coordinate = region->imageOffset.x;
        clip.clip_window_bottom_pixel_coordinate = region->imageOffset.y;
        clip.clip_window_width_in_pixels = region->imageExtent.width;
        clip.clip_window_height_in_pixels = region->imageExtent.height;
     }
  
-   cl_emit(&cmd_buffer->bcl, FLUSH, flush);
+   cl_emit(&job->bcl, FLUSH, flush);
  }
  
  /* Sets framebuffer dimensions and computes tile size parameters based on the
@@ -365,35 +360,30 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
        struct v3dv_framebuffer framebuffer;
        setup_framebuffer_params(&framebuffer, image, num_layers, internal_bpp);
  
-      /* FIXME: here we assume that we have a valid tile alloc/state setup,
-       *        which is usually the case for copy after render scenarios. The
-       *        code below simply checks and asserts this requirement,
-       *        however, a proper implementation should allocate new tile
-       *        alloc/state if we don't have one (for example if we haven't
-       *        recorded a render pass yet) or the one we have isn't large
-       *        enough. We still need to figure out how we want to handle
-       *        varying tile alloc/state requirements in a command buffer.
-       */
+      struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer);
+
        uint32_t tile_alloc_size = 64 * num_layers *
                                   framebuffer.draw_tiles_x *
                                   framebuffer.draw_tiles_y;
        tile_alloc_size = align(tile_alloc_size, 4096);
        tile_alloc_size += 8192;
        tile_alloc_size += 512 * 1024;
-      assert(cmd_buffer->tile_alloc &&
-             cmd_buffer->tile_alloc->size >= tile_alloc_size);
+      job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size);
+      v3dv_job_add_bo(job, job->tile_alloc);
  
        const uint32_t tsda_per_tile_size = 256;
        const uint32_t tile_state_size = num_layers *
                                         framebuffer.draw_tiles_x *
                                         framebuffer.draw_tiles_y *
                                         tsda_per_tile_size;
-      assert(cmd_buffer->tile_state &&
-             cmd_buffer->tile_state->size >= tile_state_size);
+      job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size);
+      v3dv_job_add_bo(job, job->tile_state);
  
-      emit_copy_image_to_buffer_bcl(cmd_buffer, &framebuffer, region);
-      emit_copy_image_to_buffer_rcl(cmd_buffer, buffer, image,
+      emit_copy_image_to_buffer_bcl(job, &framebuffer, region);
+      emit_copy_image_to_buffer_rcl(job, buffer, image,
                                      &framebuffer, internal_type, region);
+
+      v3dv_cmd_buffer_finish_job(cmd_buffer);
  }
  
  void
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h

index 3ef1d14..d9c698f 100644 (file)
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -439,6 +439,30 @@ struct v3dv_dynamic_state {
  
  extern const struct v3dv_dynamic_state default_dynamic_state;
  
+struct v3dv_job {
+   struct list_head list_link;
+
+   struct v3dv_cmd_buffer *cmd_buffer;
+
+   struct v3dv_cl bcl;
+   struct v3dv_cl rcl;
+   struct v3dv_cl indirect;
+
+   /* Set of all BOs referenced by the job. This will be used for making
+    * the list of BOs that the kernel will need to have paged in to
+    * execute our job.
+    */
+   struct set *bos;
+   uint32_t bo_count;
+
+   struct v3dv_bo *tile_alloc;
+   struct v3dv_bo *tile_state;
+
+   bool tmu_dirty_rcl;
+};
+
+void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
+
  struct v3dv_cmd_buffer_state {
     const struct v3dv_render_pass *pass;
     const struct v3dv_framebuffer *framebuffer;
@@ -456,8 +480,8 @@ struct v3dv_cmd_buffer_state {
     struct v3dv_dynamic_state dynamic;
     uint32_t dirty;
  
-   /* FIXME: here? */
-   bool tmu_dirty_rcl;
+   /* Current job being recorded */
+   struct v3dv_job *job;
  };
  
  struct v3dv_cmd_buffer {
@@ -471,26 +495,16 @@ struct v3dv_cmd_buffer {
     VkCommandBufferUsageFlags usage_flags;
     VkCommandBufferLevel level;
  
-   struct v3dv_cl bcl;
-   struct v3dv_cl rcl;
-   struct v3dv_cl indirect;
-
     enum v3dv_cmd_buffer_status status;
  
     struct v3dv_cmd_buffer_state state;
  
-   /* Set of all BOs referenced by the job. This will be used for making
-    * the list of BOs that the kernel will need to have paged in to
-    * execute our job.
-    */
-   struct set *bos;
-   uint32_t bo_count;
-
-   struct v3dv_bo *tile_alloc;
-   struct v3dv_bo *tile_state;
+   /* List of jobs to submit to the kernel */
+   struct list_head submit_jobs;
  };
  
-void v3dv_cmd_buffer_add_bo(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo);
+struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer);
+void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
  
  struct v3dv_shader_module {
     unsigned char sha1[20];
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c

index 678bfb4..186c9f0 100644 (file)
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -29,23 +29,23 @@
  #include <errno.h>
  
  static void
-v3dv_clif_dump(struct v3dv_queue *queue,
-               struct v3dv_cmd_buffer *cmd_buffer,
+v3dv_clif_dump(struct v3dv_device *device,
+               struct v3dv_job *job,
                 struct drm_v3d_submit_cl *submit)
  {
     if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
        return;
  
-   struct clif_dump *clif = clif_dump_init(&queue->device->devinfo,
+   struct clif_dump *clif = clif_dump_init(&device->devinfo,
                                             stderr,
                                             V3D_DEBUG & V3D_DEBUG_CL);
  
-   set_foreach(cmd_buffer->bos, entry) {
+   set_foreach(job->bos, entry) {
        struct v3dv_bo *bo = (void *)entry->key;
        char *name = ralloc_asprintf(NULL, "%s_0x%x",
                                     "" /* bo->name */ , bo->offset);
  
-      v3dv_bo_map(queue->device, bo, bo->size);
+      v3dv_bo_map(device, bo, bo->size);
        clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
  
        ralloc_free(name);
@@ -57,17 +57,9 @@ v3dv_clif_dump(struct v3dv_queue *queue,
  }
  
  static VkResult
-queue_submit(struct v3dv_queue *queue,
-             const VkSubmitInfo *pSubmit,
-             VkFence fence)
+job_submit(struct v3dv_job *job)
  {
-   /* FIXME */
-   assert(fence == 0);
-   assert(pSubmit->waitSemaphoreCount == 0);
-   assert(pSubmit->signalSemaphoreCount == 0);
-   assert(pSubmit->commandBufferCount == 1);
-
-   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pSubmit->pCommandBuffers[0]);
+   assert(job);
  
     struct drm_v3d_submit_cl submit;
  
@@ -79,36 +71,37 @@ queue_submit(struct v3dv_queue *queue,
     /* Update the sync object for the last rendering by our context. */
     submit.out_sync = 0; /* FIXME */
  
-   submit.bcl_start = cmd_buffer->bcl.bo->offset;
-   submit.bcl_end = cmd_buffer->bcl.bo->offset + v3dv_cl_offset(&cmd_buffer->bcl);
-   submit.rcl_start = cmd_buffer->rcl.bo->offset;
-   submit.rcl_end = cmd_buffer->rcl.bo->offset + v3dv_cl_offset(&cmd_buffer->rcl);
+   submit.bcl_start = job->bcl.bo->offset;
+   submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
+   submit.rcl_start = job->rcl.bo->offset;
+   submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
  
     submit.flags = 0;
     /* FIXME: we already know that we support cache flush, as we only support
      * hw that supports that, but would be better to just DRM-ask it
      */
-   if (cmd_buffer->state.tmu_dirty_rcl)
+   if (job->tmu_dirty_rcl)
        submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
  
-   submit.qma = cmd_buffer->tile_alloc->offset;
-   submit.qms = cmd_buffer->tile_alloc->size;
-   submit.qts = cmd_buffer->tile_state->offset;
+   submit.qma = job->tile_alloc->offset;
+   submit.qms = job->tile_alloc->size;
+   submit.qts = job->tile_state->offset;
  
-   submit.bo_handle_count = cmd_buffer->bo_count;
+   submit.bo_handle_count = job->bo_count;
     uint32_t *bo_handles =
        (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit.bo_handle_count * 2));
     uint32_t bo_idx = 0;
-   set_foreach(cmd_buffer->bos, entry) {
+   set_foreach(job->bos, entry) {
        struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
        bo_handles[bo_idx++] = bo->handle;
     }
     assert(bo_idx == submit.bo_handle_count);
     submit.bo_handles = (uintptr_t)(void *)bo_handles;
  
-   v3dv_clif_dump(queue, cmd_buffer, &submit);
+   struct v3dv_device *device = job->cmd_buffer->device;
+   v3dv_clif_dump(device, job, &submit);
  
-   int ret = v3dv_ioctl(queue->device->fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
+   int ret = v3dv_ioctl(device->fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
     static bool warned = false;
     if (ret && !warned) {
        fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
@@ -124,6 +117,29 @@ queue_submit(struct v3dv_queue *queue,
     return VK_SUCCESS;
  }
  
+static VkResult
+queue_submit(struct v3dv_queue *queue,
+             const VkSubmitInfo *pSubmit,
+             VkFence fence)
+{
+   /* FIXME */
+   assert(fence == 0);
+   assert(pSubmit->waitSemaphoreCount == 0);
+   assert(pSubmit->signalSemaphoreCount == 0);
+   assert(pSubmit->commandBufferCount == 1);
+
+   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, pSubmit->pCommandBuffers[0]);
+
+   list_for_each_entry_safe(struct v3dv_job, job,
+                            &cmd_buffer->submit_jobs, list_link) {
+      VkResult result = job_submit(job);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   return VK_SUCCESS;
+}
+
  VkResult
  v3dv_QueueSubmit(VkQueue _queue,
                   uint32_t submitCount,
diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c

index 109dbe4..0652753 100644 (file)
--- a/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/src/broadcom/vulkan/v3dv_uniforms.c
@@ -34,6 +34,9 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
     struct v3d_uniform_list *uinfo = &p_stage->prog_data.base->uniforms;
     struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
  
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
     /* The hardware always pre-fetches the next uniform (also when there
      * aren't any), so we always allocate space for an extra slot. This
      * fixes MMU exceptions reported since Linux kernel 5.4 when the
@@ -42,13 +45,11 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
      * the last uniform it will read beyond the end of the page and trigger
      * the MMU exception.
      */
-   v3dv_cl_ensure_space(&cmd_buffer->indirect, (uinfo->count + 1) * 4, 4);
+   v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
  
-   struct v3dv_cl_reloc uniform_stream =
-      v3dv_cl_get_address(&cmd_buffer->indirect);
+   struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
  
-   struct v3dv_cl_out *uniforms =
-      cl_start(&cmd_buffer->indirect);
+   struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
  
     for (int i = 0; i < uinfo->count; i++) {
        uint32_t data = uinfo->data[i];
@@ -79,7 +80,7 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
        }
     }
  
-   cl_end(&cmd_buffer->indirect, uniforms);
+   cl_end(&job->indirect, uniforms);
  
     return uniform_stream;
  }
author	Iago Toral Quiroga <itoral@igalia.com>
	Wed, 8 Jan 2020 10:14:35 +0000 (11:14 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 13 Oct 2020 21:21:26 +0000 (21:21 +0000)
src/broadcom/vulkan/v3dv_cl.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_cl.h		patch \| blob \| history
src/broadcom/vulkan/v3dv_cmd_buffer.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_meta_copy.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_private.h		patch \| blob \| history
src/broadcom/vulkan/v3dv_queue.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_uniforms.c		patch \| blob \| history