KHR-GL33.transform_feedback.query_vertex_interleaved_test,Fail
KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
-# Fails with TU_DEBUG=forcebin
-# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12213
-dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image.4_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image_one_region.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_array_image.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_array_image_one_region.4_bit,Fail
-
# "Fail (createInstance returned VK_ERROR_INITIALIZATION_FAILED)"
# happens inside the loader on anholt's debian system, and there are various
# likely-looking fixes in later versions of the loader.
gmem_offset = att->gmem_offset_stencil;
}
- if (!gmem)
+ if (!gmem || !subpass->input_attachments[i / 2].patch_input_gmem)
continue;
/* patched for gmem */
cmd->state.cache.pending_flush_bits;
cmd->state.renderpass_cache.flush_bits = 0;
+ if (pass->subpasses[0].feedback_invalidate)
+ cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
+
/* Track LRZ valid state */
uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment;
if (a != VK_ATTACHMENT_UNUSED) {
/* Handle dependencies for the next subpass */
tu_subpass_barrier(cmd, &cmd->state.subpass->start_barrier, false);
+ if (cmd->state.subpass->feedback_invalidate)
+ cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
+
/* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
tu6_emit_zs(cmd, cmd->state.subpass, cs);
tu6_emit_mrt(cmd, cmd->state.subpass, cs);
}
}
+/* If an input attachment is used without an intervening write to the same
+ * attachment, then we can just use the original image, even in GMEM mode.
+ * This is an optimization, but it's also important because it allows us to
+ * avoid having to invalidate UCHE at the beginning of each tile due to it
+ * becoming invalid. The only reads of GMEM via UCHE should be after an
+ * earlier subpass modified it, which only works if there's already an
+ * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
+ * don't consider this in the dependency code, so this is also required for
+ * correctness.
+ */
+static void
+tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
+{
+ bool written[pass->attachment_count];
+
+ memset(written, 0, sizeof(written));
+
+ for (unsigned i = 0; i < pass->subpass_count; i++) {
+ struct tu_subpass *subpass = &pass->subpasses[i];
+
+ for (unsigned j = 0; j < subpass->input_count; j++) {
+ uint32_t a = subpass->input_attachments[j].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+ subpass->input_attachments[j].patch_input_gmem = written[a];
+ }
+
+ for (unsigned j = 0; j < subpass->color_count; j++) {
+ uint32_t a = subpass->color_attachments[j].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+ written[a] = true;
+
+ for (unsigned k = 0; k < subpass->input_count; k++) {
+ if (subpass->input_attachments[k].attachment == a &&
+ !subpass->input_attachments[k].patch_input_gmem) {
+ /* For render feedback loops, we have no idea whether the use
+ * as a color attachment or input attachment will come first,
+ * so we have to always use GMEM in case the color attachment
+ * comes first and defensively invalidate UCHE in case the
+ * input attachment comes first.
+ */
+ subpass->feedback_invalidate = true;
+ subpass->input_attachments[k].patch_input_gmem = true;
+ }
+ }
+ }
+
+ for (unsigned j = 0; j < subpass->resolve_count; j++) {
+ uint32_t a = subpass->resolve_attachments[j].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+ written[a] = true;
+ }
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ written[subpass->depth_stencil_attachment.attachment] = true;
+ for (unsigned k = 0; k < subpass->input_count; k++) {
+ if (subpass->input_attachments[k].attachment ==
+ subpass->depth_stencil_attachment.attachment &&
+ !subpass->input_attachments[k].patch_input_gmem) {
+ subpass->feedback_invalidate = true;
+ subpass->input_attachments[k].patch_input_gmem = true;
+ }
+ }
+ }
+ }
+}
+
static void update_samples(struct tu_subpass *subpass,
VkSampleCountFlagBits samples)
{
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
uint32_t a = desc->pInputAttachments[j].attachment;
subpass->input_attachments[j].attachment = a;
- if (a != VK_ATTACHMENT_UNUSED)
- pass->attachments[a].gmem_offset = 0;
+ /* Note: attachments only used as input attachments will be read
+ * directly instead of through gmem, so we don't mark input
+ * attachments as needing gmem.
+ */
}
}
}
}
+ tu_render_pass_patch_input_gmem(pass);
+
/* disable unused attachments */
for (uint32_t i = 0; i < pass->attachment_count; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[i];