panvk: Preload FB attachments when required
authorBoris Brezillon <boris.brezillon@collabora.com>
Mon, 6 Sep 2021 14:06:49 +0000 (16:06 +0200)
committerBoris Brezillon <boris.brezillon@collabora.com>
Tue, 21 Sep 2021 13:00:07 +0000 (15:00 +0200)
There are at least three situations where we need to preload FBs:

1. The attachment is flagged VK_ATTACHMENT_LOAD_OP_LOAD and has not been
   accessed in previous subpasses

2. The batch is implicitly split (e.g. too many jobs queued to the
   batch, wait/set events queued, ...)

3. The attachment has been written by a previous subpass

With those changes, we can get rid of panvk_emit_fb() and call
pan_emit_fbd() directly (fb_info is initialized when starting a subpass
and updated when an implicit split happens).

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12095>

src/panfrost/vulkan/panvk_cmd_buffer.c
src/panfrost/vulkan/panvk_pass.c
src/panfrost/vulkan/panvk_private.h
src/panfrost/vulkan/panvk_vX_cmd_buffer.c
src/panfrost/vulkan/panvk_vX_cs.c
src/panfrost/vulkan/panvk_vX_cs.h

index ae13cd4..af0fcf3 100644 (file)
@@ -342,6 +342,75 @@ panvk_cmd_prepare_clear_values(struct panvk_cmd_buffer *cmdbuf,
 }
 
 void
+panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf)
+{
+   const struct panvk_subpass *subpass = cmdbuf->state.subpass;
+   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
+   const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer;
+   const struct panvk_clear_value *clears = cmdbuf->state.clear;
+   struct panvk_image_view *view;
+
+   fbinfo->nr_samples = 1;
+   fbinfo->rt_count = subpass->color_count;
+   memset(&fbinfo->bifrost.pre_post.dcds, 0, sizeof(fbinfo->bifrost.pre_post.dcds));
+
+   for (unsigned cb = 0; cb < subpass->color_count; cb++) {
+      int idx = subpass->color_attachments[cb].idx;
+      view = idx != VK_ATTACHMENT_UNUSED ?
+             fb->attachments[idx].iview : NULL;
+      if (!view)
+         continue;
+      fbinfo->rts[cb].view = &view->pview;
+      fbinfo->rts[cb].clear = subpass->color_attachments[cb].clear;
+      fbinfo->rts[cb].preload = subpass->color_attachments[cb].preload;
+      fbinfo->rts[cb].crc_valid = &cmdbuf->state.fb.crc_valid[cb];
+
+      memcpy(fbinfo->rts[cb].clear_value, clears[idx].color,
+             sizeof(fbinfo->rts[cb].clear_value));
+      fbinfo->nr_samples =
+         MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples);
+   }
+
+   if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
+      view = fb->attachments[subpass->zs_attachment.idx].iview;
+      const struct util_format_description *fdesc =
+         util_format_description(view->pview.format);
+
+      fbinfo->nr_samples =
+         MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples);
+
+      if (util_format_has_depth(fdesc)) {
+         fbinfo->zs.clear.z = subpass->zs_attachment.clear;
+         fbinfo->zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
+         fbinfo->zs.view.zs = &view->pview;
+      }
+
+      if (util_format_has_stencil(fdesc)) {
+         fbinfo->zs.clear.s = subpass->zs_attachment.clear;
+         fbinfo->zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
+         if (!fbinfo->zs.view.zs)
+            fbinfo->zs.view.s = &view->pview;
+      }
+   }
+}
+
+void
+panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf)
+{
+   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
+   const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer;
+
+   memset(cmdbuf->state.fb.crc_valid, 0, sizeof(cmdbuf->state.fb.crc_valid));
+
+   *fbinfo = (struct pan_fb_info) {
+      .width = fb->width,
+      .height = fb->height,
+      .extent.maxx = fb->width - 1,
+      .extent.maxy = fb->height - 1,
+   };
+}
+
+void
 panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
                           const VkRenderPassBeginInfo *pRenderPassBegin,
                           const VkSubpassBeginInfo *pSubpassBeginInfo)
@@ -363,9 +432,10 @@ panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
                                    sizeof(*cmdbuf->state.clear) *
                                    pRenderPassBegin->clearValueCount, 8,
                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   assert(pRenderPassBegin->clearValueCount == pass->attachment_count);
    panvk_cmd_prepare_clear_values(cmdbuf, pRenderPassBegin->pClearValues);
    memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute));
+   panvk_cmd_fb_info_init(cmdbuf);
+   panvk_cmd_fb_info_set_subpass(cmdbuf);
 }
 
 void
@@ -382,6 +452,29 @@ panvk_CmdBeginRenderPass(VkCommandBuffer cmd,
 }
 
 void
+panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf)
+{
+   for (unsigned i = 0; i < cmdbuf->state.fb.info.rt_count; i++) {
+      if (cmdbuf->state.fb.info.rts[i].view) {
+         cmdbuf->state.fb.info.rts[i].clear = false;
+         cmdbuf->state.fb.info.rts[i].preload = true;
+      }
+   }
+
+   if (cmdbuf->state.fb.info.zs.view.zs) {
+      cmdbuf->state.fb.info.zs.clear.z = false;
+      cmdbuf->state.fb.info.zs.preload.z = true;
+   }
+
+   if (cmdbuf->state.fb.info.zs.view.s ||
+       (cmdbuf->state.fb.info.zs.view.zs &&
+        util_format_is_depth_and_stencil(cmdbuf->state.fb.info.zs.view.zs->format))) {
+      cmdbuf->state.fb.info.zs.clear.s = false;
+      cmdbuf->state.fb.info.zs.preload.s = true;
+   }
+}
+
+void
 panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf)
 {
    assert(!cmdbuf->state.batch);
index 82ed878..04d187a 100644 (file)
@@ -79,7 +79,7 @@ panvk_CreateRenderPass2(VkDevice _device,
       att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
       att->store_op = pCreateInfo->pAttachments[i].storeOp;
       att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
-      att->clear_subpass = ~0;
+      att->first_used_in_subpass = ~0;
    }
 
    uint32_t subpass_attachment_count = 0;
@@ -144,9 +144,14 @@ panvk_CreateRenderPass2(VkDevice _device,
 
             if (idx != VK_ATTACHMENT_UNUSED) {
                pass->attachments[idx].view_mask |= subpass->view_mask;
-               if (pass->attachments[idx].clear_subpass == ~0) {
-                  pass->attachments[idx].clear_subpass = i;
-                  subpass->color_attachments[j].clear = true;
+               if (pass->attachments[idx].first_used_in_subpass == ~0) {
+                  pass->attachments[idx].first_used_in_subpass = i;
+                  if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+                     subpass->color_attachments[j].clear = true;
+                  else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
+                     subpass->color_attachments[j].preload = true;
+               } else {
+                  subpass->color_attachments[j].preload = true;
                }
             }
          }
@@ -176,9 +181,15 @@ panvk_CreateRenderPass2(VkDevice _device,
       if (idx != VK_ATTACHMENT_UNUSED) {
          subpass->zs_attachment.layout = desc->pDepthStencilAttachment->layout;
          pass->attachments[idx].view_mask |= subpass->view_mask;
-         if (pass->attachments[idx].clear_subpass == ~0) {
-            pass->attachments[idx].clear_subpass = i;
-            subpass->zs_attachment.clear = true;
+
+         if (pass->attachments[idx].first_used_in_subpass == ~0) {
+            pass->attachments[idx].first_used_in_subpass = i;
+            if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+               subpass->zs_attachment.clear = true;
+            else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
+               subpass->zs_attachment.preload = true;
+         } else {
+            subpass->zs_attachment.preload = true;
          }
       }
    }
index bf38e10..964cfb6 100644 (file)
@@ -585,6 +585,11 @@ struct panvk_cmd_state {
       } s_front, s_back;
    } zs;
 
+   struct {
+      struct pan_fb_info info;
+      bool crc_valid[MAX_RTS];
+   } fb;
+
    const struct panvk_render_pass *pass;
    const struct panvk_subpass *subpass;
    const struct panvk_framebuffer *framebuffer;
@@ -650,6 +655,15 @@ void
 panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf);
 
 void
+panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf);
+
+void
+panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf);
+
+void
+panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf);
+
+void
 panvk_pack_color(struct panvk_clear_value *out,
                  const VkClearColorValue *in,
                  enum pipe_format format);
@@ -918,6 +932,7 @@ struct panvk_subpass_attachment {
    uint32_t idx;
    VkImageLayout layout;
    bool clear;
+   bool preload;
 };
 
 struct panvk_subpass {
@@ -943,7 +958,7 @@ struct panvk_render_pass_attachment {
    VkImageLayout initial_layout;
    VkImageLayout final_layout;
    unsigned view_mask;
-   unsigned clear_subpass;
+   unsigned first_used_in_subpass;
 };
 
 struct panvk_render_pass {
index aa2f2ec..ed070a3 100644 (file)
@@ -147,6 +147,18 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
 
    list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches);
 
+   if (batch->scoreboard.first_tiler) {
+      struct panfrost_ptr preload_jobs[2];
+      unsigned num_preload_jobs =
+         GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard,
+                              &cmdbuf->state.fb.info,
+                              PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu,
+                              PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0,
+                              preload_jobs);
+      for (unsigned i = 0; i < num_preload_jobs; i++)
+         util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
+   }
+
    struct pan_tls_info tlsinfo = { 0 };
 
    if (cmdbuf->state.pipeline) {
@@ -195,13 +207,8 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
 #endif
 
       cmdbuf->state.batch->fb.desc.gpu |=
-         panvk_per_arch(emit_fb)(cmdbuf->device,
-                                 cmdbuf->state.batch,
-                                 cmdbuf->state.subpass,
-                                 cmdbuf->state.framebuffer,
-                                 cmdbuf->state.clear,
-                                 &tlsinfo, &cmdbuf->state.batch->tiler.ctx,
-                                 fbd);
+         GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &tlsinfo,
+                            &cmdbuf->state.batch->tiler.ctx, fbd);
 
 #if PAN_ARCH <= 5
       panvk_copy_fb_desc(cmdbuf, tmp_fbd);
@@ -226,6 +233,7 @@ panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
    panvk_per_arch(cmd_close_batch)(cmdbuf);
 
    cmdbuf->state.subpass++;
+   panvk_cmd_fb_info_set_subpass(cmdbuf);
    panvk_cmd_open_batch(cmdbuf);
    memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute));
 }
@@ -265,6 +273,11 @@ panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
 
    /* Tag the pointer */
    batch->fb.desc.gpu |= tags;
+
+#if PAN_ARCH >= 6
+   memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
+          sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
+#endif
 }
 
 void
@@ -685,6 +698,7 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
     */
    if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) {
       panvk_per_arch(cmd_close_batch)(cmdbuf);
+      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
       panvk_cmd_open_batch(cmdbuf);
       batch = cmdbuf->state.batch;
    }
@@ -802,6 +816,7 @@ panvk_per_arch(CmdPipelineBarrier)(VkCommandBuffer commandBuffer,
     */
    if (cmdbuf->state.batch) {
       panvk_per_arch(cmd_close_batch)(cmdbuf);
+      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
       panvk_cmd_open_batch(cmdbuf);
    }
 }
@@ -833,6 +848,7 @@ panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
                            struct panvk_event_op,
                            op);
       panvk_per_arch(cmd_close_batch)(cmdbuf);
+      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
       panvk_cmd_open_batch(cmdbuf);
    }
 }
@@ -859,6 +875,7 @@ panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
       if (cmdbuf->state.batch->fragment_job ||
           cmdbuf->state.batch->scoreboard.first_job) {
          panvk_per_arch(cmd_close_batch)(cmdbuf);
+         panvk_cmd_preload_fb_after_batch_split(cmdbuf);
          panvk_cmd_open_batch(cmdbuf);
       }
       util_dynarray_append(&cmdbuf->state.batch->event_ops,
index b5dfe8e..e3ae39e 100644 (file)
@@ -858,65 +858,3 @@ panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
    }
 #endif
 }
-
-unsigned
-panvk_per_arch(emit_fb)(const struct panvk_device *dev,
-                        const struct panvk_batch *batch,
-                        const struct panvk_subpass *subpass,
-                        const struct panvk_framebuffer *fb,
-                        const struct panvk_clear_value *clears,
-                        const struct pan_tls_info *tlsinfo,
-                        const struct pan_tiler_context *tilerctx,
-                        void *desc)
-{
-   const struct panfrost_device *pdev = &dev->physical_device->pdev;
-   struct panvk_image_view *view;
-   bool crc_valid[8] = { false };
-   struct pan_fb_info fbinfo = {
-      .width = fb->width,
-      .height = fb->height,
-      .extent.maxx = fb->width - 1,
-      .extent.maxy = fb->height - 1,
-      .nr_samples = 1,
-   };
-
-   for (unsigned cb = 0; cb < subpass->color_count; cb++) {
-      int idx = subpass->color_attachments[cb].idx;
-      view = idx != VK_ATTACHMENT_UNUSED ?
-             fb->attachments[idx].iview : NULL;
-      if (!view)
-         continue;
-      fbinfo.rts[cb].view = &view->pview;
-      fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear;
-      fbinfo.rts[cb].crc_valid = &crc_valid[cb];
-
-      memcpy(fbinfo.rts[cb].clear_value, clears[idx].color,
-             sizeof(fbinfo.rts[cb].clear_value));
-      fbinfo.nr_samples =
-         MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
-   }
-
-   if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
-      view = fb->attachments[subpass->zs_attachment.idx].iview;
-      const struct util_format_description *fdesc =
-         util_format_description(view->pview.format);
-
-      fbinfo.nr_samples =
-         MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
-
-      if (util_format_has_depth(fdesc)) {
-         fbinfo.zs.clear.z = subpass->zs_attachment.clear;
-         fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
-         fbinfo.zs.view.zs = &view->pview;
-      }
-
-      if (util_format_has_depth(fdesc)) {
-         fbinfo.zs.clear.s = subpass->zs_attachment.clear;
-         fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
-         if (!fbinfo.zs.view.zs)
-            fbinfo.zs.view.s = &view->pview;
-      }
-   }
-
-   return GENX(pan_emit_fbd)(pdev, &fbinfo, tlsinfo, tilerctx, desc);
-}
index b8933ce..3158c76 100644 (file)
@@ -125,13 +125,3 @@ void
 panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
                                    unsigned width, unsigned height,
                                    const struct panfrost_ptr *descs);
-
-unsigned
-panvk_per_arch(emit_fb)(const struct panvk_device *dev,
-                        const struct panvk_batch *batch,
-                        const struct panvk_subpass *subpass,
-                        const struct panvk_framebuffer *fb,
-                        const struct panvk_clear_value *clears,
-                        const struct pan_tls_info *tlsinfo,
-                        const struct pan_tiler_context *tilerctx,
-                        void *desc);