tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
+ tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
+ A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
+
if (cmd->state.predication_active) {
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
tu_cs_emit(cs, 0);
unsigned layers = MAX2(fb->layers, util_logbase2(subpass->multiview_mask) + 1);
tu_cs_emit_regs(cs, A6XX_GRAS_MAX_LAYER_INDEX(layers - 1));
+
+ tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
+ A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
+
+ /* If there is a feedback loop, then the shader can read the previous value
+ * of a pixel being written out. It can also write some components and then
+ * read different components without a barrier in between. This is a
+ * problem in sysmem mode with UBWC, because the main buffer and flags
+ * buffer can get out-of-sync if only one is flushed. We fix this by
+ * setting the SINGLE_PRIM_MODE field to the same value that the blob does
+ * for advanced_blend in sysmem mode if a feedback loop is detected.
+ */
+ if (subpass->feedback) {
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
+ tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
+ A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
+ A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(
+ FLUSH_PER_OVERLAP_AND_OVERWRITE));
+ tu_cond_exec_end(cs);
+ }
}
void
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0);
- tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
- A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
}
}
+static void
+tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
+{
+ for (unsigned i = 0; i < pass->subpass_count; i++) {
+ struct tu_subpass *subpass = &pass->subpasses[i];
+
+ for (unsigned j = 0; j < subpass->color_count; j++) {
+ uint32_t a = subpass->color_attachments[j].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+ for (unsigned k = 0; k < subpass->input_count; k++) {
+ if (subpass->input_attachments[k].attachment == a) {
+ subpass->feedback = true;
+ break;
+ }
+ }
+ }
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ for (unsigned k = 0; k < subpass->input_count; k++) {
+ if (subpass->input_attachments[k].attachment ==
+ subpass->depth_stencil_attachment.attachment) {
+ subpass->feedback = true;
+ break;
+ }
+ }
+ }
+ }
+}
+
static void update_samples(struct tu_subpass *subpass,
VkSampleCountFlagBits samples)
{
tu_render_pass_patch_input_gmem(pass);
+ tu_render_pass_check_feedback_loop(pass);
+
/* disable unused attachments */
for (uint32_t i = 0; i < pass->attachment_count; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[i];