tu: Read some input attachments directly

author Connor Abbott <cwabbott0@gmail.com>

Thu, 5 Aug 2021 12:52:00 +0000 (14:52 +0200)

committer Connor Abbott <cwabbott0@gmail.com>

Tue, 10 Aug 2021 14:45:53 +0000 (16:45 +0200)
author Connor Abbott <cwabbott0@gmail.com>
Thu, 5 Aug 2021 12:52:00 +0000 (14:52 +0200)
committer Connor Abbott <cwabbott0@gmail.com>
Tue, 10 Aug 2021 14:45:53 +0000 (16:45 +0200)
diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt

index 9d63a70..c8a1715 100644 (file)
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -11,13 +11,6 @@ KHR-GL33.transform_feedback.draw_xfb_stream_instanced_test,Crash
  KHR-GL33.transform_feedback.query_vertex_interleaved_test,Fail
  KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
  
-# Fails with TU_DEBUG=forcebin
-# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12213
-dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image.4_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image_one_region.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_array_image.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_array_image_one_region.4_bit,Fail
-
  # "Fail (createInstance returned VK_ERROR_INITIALIZATION_FAILED)"
  # happens inside the loader on anholt's debian system, and there are various
  # likely-looking fixes in later versions of the loader.
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c

index 05a8a31..31100c5 100644 (file)
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -1056,7 +1056,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
           gmem_offset = att->gmem_offset_stencil;
        }
  
-      if (!gmem)
+      if (!gmem || !subpass->input_attachments[i / 2].patch_input_gmem)
           continue;
  
        /* patched for gmem */
@@ -2924,6 +2924,9 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
        cmd->state.cache.pending_flush_bits;
     cmd->state.renderpass_cache.flush_bits = 0;
  
+   if (pass->subpasses[0].feedback_invalidate)
+      cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
+
     /* Track LRZ valid state */
     uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment;
     if (a != VK_ATTACHMENT_UNUSED) {
@@ -3011,6 +3014,9 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
     /* Handle dependencies for the next subpass */
     tu_subpass_barrier(cmd, &cmd->state.subpass->start_barrier, false);
  
+   if (cmd->state.subpass->feedback_invalidate)
+      cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
+
     /* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
     tu6_emit_zs(cmd, cmd->state.subpass, cs);
     tu6_emit_mrt(cmd, cmd->state.subpass, cs);
diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c

index 7df0060..a76abe9 100644 (file)
--- a/src/freedreno/vulkan/tu_pass.c
+++ b/src/freedreno/vulkan/tu_pass.c
@@ -334,6 +334,75 @@ tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
     }
  }
  
+/* If an input attachment is used without an intervening write to the same
+ * attachment, then we can just use the original image, even in GMEM mode.
+ * This is an optimization, but it's also important because it allows us to
+ * avoid having to invalidate UCHE at the beginning of each tile due to it
+ * becoming invalid. The only reads of GMEM via UCHE should be after an
+ * earlier subpass modified it, which only works if there's already an
+ * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
+ * don't consider this in the dependency code, so this is also required for
+ * correctness.
+ */
+static void
+tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
+{
+   bool written[pass->attachment_count];
+
+   memset(written, 0, sizeof(written));
+
+   for (unsigned i = 0; i < pass->subpass_count; i++) {
+      struct tu_subpass *subpass = &pass->subpasses[i];
+
+      for (unsigned j = 0; j < subpass->input_count; j++) {
+         uint32_t a = subpass->input_attachments[j].attachment;
+         if (a == VK_ATTACHMENT_UNUSED)
+            continue;
+         subpass->input_attachments[j].patch_input_gmem = written[a];
+      }
+
+      for (unsigned j = 0; j < subpass->color_count; j++) {
+         uint32_t a = subpass->color_attachments[j].attachment;
+         if (a == VK_ATTACHMENT_UNUSED)
+            continue;
+         written[a] = true;
+
+         for (unsigned k = 0; k < subpass->input_count; k++) {
+            if (subpass->input_attachments[k].attachment == a &&
+                !subpass->input_attachments[k].patch_input_gmem) {
+               /* For render feedback loops, we have no idea whether the use
+                * as a color attachment or input attachment will come first,
+                * so we have to always use GMEM in case the color attachment
+                * comes first and defensively invalidate UCHE in case the
+                * input attachment comes first.
+                */
+               subpass->feedback_invalidate = true;
+               subpass->input_attachments[k].patch_input_gmem = true;
+            }
+         }
+      }
+
+      for (unsigned j = 0; j < subpass->resolve_count; j++) {
+         uint32_t a = subpass->resolve_attachments[j].attachment;
+         if (a == VK_ATTACHMENT_UNUSED)
+            continue;
+         written[a] = true;
+      }
+
+      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+         written[subpass->depth_stencil_attachment.attachment] = true;
+         for (unsigned k = 0; k < subpass->input_count; k++) {
+            if (subpass->input_attachments[k].attachment ==
+                subpass->depth_stencil_attachment.attachment &&
+                !subpass->input_attachments[k].patch_input_gmem) {
+               subpass->feedback_invalidate = true;
+               subpass->input_attachments[k].patch_input_gmem = true;
+            }
+         }
+      }
+   }
+}
+
  static void update_samples(struct tu_subpass *subpass,
                             VkSampleCountFlagBits samples)
  {
@@ -584,8 +653,10 @@ tu_CreateRenderPass2(VkDevice _device,
           for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
              uint32_t a = desc->pInputAttachments[j].attachment;
              subpass->input_attachments[j].attachment = a;
-            if (a != VK_ATTACHMENT_UNUSED)
-               pass->attachments[a].gmem_offset = 0;
+            /* Note: attachments only used as input attachments will be read
+             * directly instead of through gmem, so we don't mark input
+             * attachments as needing gmem.
+             */
           }
        }
  
@@ -635,6 +706,8 @@ tu_CreateRenderPass2(VkDevice _device,
        }
     }
  
+   tu_render_pass_patch_input_gmem(pass);
+
     /* disable unused attachments */
     for (uint32_t i = 0; i < pass->attachment_count; i++) {
        struct tu_render_pass_attachment *att = &pass->attachments[i];
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h

index 9c867c9..d669595 100644 (file)
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1542,6 +1542,12 @@ struct tu_subpass_barrier {
  struct tu_subpass_attachment
  {
     uint32_t attachment;
+
+   /* For input attachments, true if it needs to be patched to refer to GMEM
+    * in GMEM mode. This is false if it hasn't already been written as an
+    * attachment.
+    */
+   bool patch_input_gmem;
  };
  
  struct tu_subpass
@@ -1550,6 +1556,10 @@ struct tu_subpass
     uint32_t color_count;
     uint32_t resolve_count;
     bool resolve_depth_stencil;
+
+   /* True if we must invalidate UCHE thanks to a feedback loop. */
+   bool feedback_invalidate;
+
     struct tu_subpass_attachment *input_attachments;
     struct tu_subpass_attachment *color_attachments;
     struct tu_subpass_attachment *resolve_attachments;
author	Connor Abbott <cwabbott0@gmail.com>
	Thu, 5 Aug 2021 12:52:00 +0000 (14:52 +0200)
committer	Connor Abbott <cwabbott0@gmail.com>
	Tue, 10 Aug 2021 14:45:53 +0000 (16:45 +0200)
src/freedreno/ci/deqp-freedreno-a630-fails.txt		patch \| blob \| history
src/freedreno/vulkan/tu_cmd_buffer.c		patch \| blob \| history
src/freedreno/vulkan/tu_pass.c		patch \| blob \| history
src/freedreno/vulkan/tu_private.h		patch \| blob \| history