v3dv: fix depth/stencil clears on hardware
authorIago Toral Quiroga <itoral@igalia.com>
Thu, 14 May 2020 10:09:57 +0000 (12:09 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:30 +0000 (21:21 +0000)
There is a hw bug by which the only way to clear the depth/stencil
tile buffers is to emit a clear of all tile buffers, so if we have
to do any such clears, we just emit a single clear of all tile
buffers and skip doing any per-buffer clears, even for color buffers,
since they would be redundant.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

src/broadcom/vulkan/v3dv_cmd_buffer.c

index 9720500..100fd71 100644 (file)
@@ -1259,57 +1259,9 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
       &state->pass->subpasses[state->subpass_idx];
 
    bool has_stores = false;
-   for (uint32_t i = 0; i < subpass->color_count; i++) {
-      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
-
-      if (attachment_idx == VK_ATTACHMENT_UNUSED)
-         continue;
-
-      const struct v3dv_render_pass_attachment *attachment =
-         &state->pass->attachments[attachment_idx];
-
-      assert(state->job->first_subpass >= attachment->first_subpass);
-      assert(state->subpass_idx >= attachment->first_subpass);
-      assert(state->subpass_idx <= attachment->last_subpass);
-
-      /* Only clear once on the first subpass that uses the attachment */
-      bool needs_clear =
-         state->tile_aligned_render_area &&
-         state->job->first_subpass == attachment->first_subpass &&
-         attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
-         !state->job->is_subpass_continue;
+   bool use_per_buffer_clear = true;
 
-      /* Skip the last store if it is not required  */
-      bool needs_store =
-         state->subpass_idx < attachment->last_subpass ||
-         attachment->desc.storeOp == VK_ATTACHMENT_STORE_OP_STORE ||
-         needs_clear ||
-         !state->job->is_subpass_finish;
-
-      if (needs_store) {
-         cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
-                                           attachment_idx, layer,
-                                           RENDER_TARGET_0 + i,
-                                           needs_clear);
-         has_stores = true;
-      }
-   }
-
-   /* FIXME: separate stencil
-    *
-    * GFXH-1461/GFXH-1689: The per-buffer store command's clear
-    * buffer bit is broken for depth/stencil.  In addition, the
-    * clear packet's Z/S bit is broken, but the RTs bit ends up
-    * clearing Z/S.
-    *
-    * This means that when we implement depth/stencil clearing we
-    * need to emit a separate clear before we start the render pass,
-    * since the RTs bit is for clearing all render targets, and we might
-    * not want to do that. We might want to consider emitting clears for
-    * all RTs needing clearing just once ahead of the first subpass.
-    */
-   bool needs_depth_clear = false;
-   bool needs_stencil_clear = false;
+   /* FIXME: separate stencil */
    uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
    if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
       const struct v3dv_render_pass_attachment *ds_attachment =
@@ -1333,14 +1285,14 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
          vk_format_aspects(ds_attachment->desc.format);
 
       /* Only clear once on the first subpass that uses the attachment */
-      needs_depth_clear =
+      bool needs_depth_clear =
          (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
          state->tile_aligned_render_area &&
          state->job->first_subpass == ds_attachment->first_subpass &&
          ds_attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
          !state->job->is_subpass_continue;
 
-      needs_stencil_clear =
+      bool needs_stencil_clear =
          (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
          state->tile_aligned_render_area &&
          state->job->first_subpass == ds_attachment->first_subpass &&
@@ -1362,13 +1314,63 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
           needs_stencil_clear ||
           !state->job->is_subpass_finish);
 
-      bool needs_ds_clear = needs_stencil_clear || needs_depth_clear;
+      /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
+       * buffer bit is broken for depth/stencil.  In addition, the
+       * clear packet's Z/S bit is broken, but the RTs bit ends up
+       * clearing Z/S.
+       *
+       * So if we have to emit a clear of depth or stencil we don't use
+       * per-buffer clears, not even for color, since we will have to emit
+       * a clear command for all tile buffers (including color) to handle
+       * the depth/stencil clears.
+       *
+       * Note that this bug is not reproduced in the simulator, where
+       * using the clear buffer bit in depth/stencil stores seems to work
+       * correctly.
+       */
+      use_per_buffer_clear = !needs_stencil_clear && !needs_depth_clear;
       bool needs_ds_store = needs_stencil_store || needs_depth_store;
       if (needs_ds_store) {
          uint32_t zs_buffer = v3dv_zs_buffer_from_aspect_bits(aspects);
          cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
                                            ds_attachment_idx, layer,
-                                           zs_buffer, needs_ds_clear);
+                                           zs_buffer, false);
+         has_stores = true;
+      }
+   }
+
+   for (uint32_t i = 0; i < subpass->color_count; i++) {
+      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+
+      if (attachment_idx == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      const struct v3dv_render_pass_attachment *attachment =
+         &state->pass->attachments[attachment_idx];
+
+      assert(state->job->first_subpass >= attachment->first_subpass);
+      assert(state->subpass_idx >= attachment->first_subpass);
+      assert(state->subpass_idx <= attachment->last_subpass);
+
+      /* Only clear once on the first subpass that uses the attachment */
+      bool needs_clear =
+         state->tile_aligned_render_area &&
+         state->job->first_subpass == attachment->first_subpass &&
+         attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
+         !state->job->is_subpass_continue;
+
+      /* Skip the last store if it is not required  */
+      bool needs_store =
+         state->subpass_idx < attachment->last_subpass ||
+         attachment->desc.storeOp == VK_ATTACHMENT_STORE_OP_STORE ||
+         needs_clear ||
+         !state->job->is_subpass_finish;
+
+      if (needs_store) {
+         cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
+                                           attachment_idx, layer,
+                                           RENDER_TARGET_0 + i,
+                                           needs_clear && use_per_buffer_clear);
          has_stores = true;
       }
    }
@@ -1380,8 +1382,10 @@ cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
       }
    }
 
-   /* FIXME: see fixme remark for depth/stencil above */
-   if (needs_depth_clear && needs_stencil_clear) {
+   /* If we have any depth/stencil clears we can't use the per-buffer clear
+    * bit and instead we have to emit a single clear of all tile buffers.
+    */
+   if (!use_per_buffer_clear) {
       cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
          clear.clear_z_stencil_buffer = true;
          clear.clear_all_render_targets = true;