From cdac7338745613448f8ccc151b4797522f12fe63 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 13 Dec 2022 14:10:29 -0500 Subject: [PATCH] radeonsi: rewrite si_update_ps_colorbuf0_slot to fix crashes and recursions I'm convinced that u_blitter interactions with fbfetch can't be handled in si_update_ps_colorbuf0_slot alone, so it has to be force-disabled by si_blitter_begin. Another reason why it has to be disabled for u_blitter and not ignored is because FBFETCH with MSAA enables sample shading regardless of context states, and we don't want that for u_blitter. Also, si_update_ps_colorbuf0_slot now disables FBFETCH explicitly before its own DCC and CMASK decompression because even though u_blitter can't do anything (due to blitter_running), si_blitter_end calls it too. The result is that no recursion can occur thanks to the blitter_running and suppress_update_ps_colorbuf0_slot flags, and FBFETCH is always force-disabled before those flags are set, which is the state we want to be in. Fixes: bc6d22b9200 ("radeonsi: fix ps_uses_fbfetch value") Acked-by: Yogesh Mohan Marimuthu Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: (cherry picked from commit 3632d398359cce1fb666b8cf6299ba624d3ccda7) --- .pick_status.json | 2 +- src/gallium/drivers/radeonsi/si_blit.c | 6 +++ src/gallium/drivers/radeonsi/si_compute_blit.c | 6 +++ src/gallium/drivers/radeonsi/si_descriptors.c | 67 ++++++++++++++++---------- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state.h | 1 + 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d0657f6..7084765 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -463,7 +463,7 @@ "description": "radeonsi: rewrite si_update_ps_colorbuf0_slot to fix crashes and recursions", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bc6d22b92002dc43a8c742ab234717147d63ad87" }, diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 4158bb3..a6b85f7 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -85,6 +85,9 @@ void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op) si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); } + /* Force-disable fbfetch because there are unsolvable recursion problems with u_blitter. */ + si_force_disable_ps_colorbuf0_slot(sctx); + sctx->blitter_running = true; } @@ -112,6 +115,9 @@ void si_blitter_end(struct si_context *sctx) sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0; si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); + + /* We force-disabled fbfetch for u_blitter, so recompute the state. */ + si_update_ps_colorbuf0_slot(sctx); } static unsigned u_max_sample(struct pipe_resource *r) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 48594af..4b954a2 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -186,6 +186,9 @@ static void si_launch_grid_internal(struct si_context *sctx, const struct pipe_g if (!(flags & SI_OP_CS_RENDER_COND_ENABLE)) sctx->render_cond_enabled = false; + /* Force-disable fbfetch because there are unsolvable recursion problems. */ + si_force_disable_ps_colorbuf0_slot(sctx); + /* Skip decompression to prevent infinite recursion. */ sctx->blitter_running = true; @@ -201,6 +204,9 @@ static void si_launch_grid_internal(struct si_context *sctx, const struct pipe_g sctx->render_cond_enabled = sctx->render_cond; sctx->blitter_running = false; + /* We force-disabled fbfetch, so recompute the state. */ + si_update_ps_colorbuf0_slot(sctx); + if (flags & SI_OP_SYNC_AFTER) { sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index abf1c4d..a6d7703 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -939,54 +939,67 @@ static void si_images_update_needs_color_decompress_mask(struct si_images *image } } +void si_force_disable_ps_colorbuf0_slot(struct si_context *sctx) +{ + if (sctx->ps_uses_fbfetch) { + sctx->ps_uses_fbfetch = false; + si_update_ps_iter_samples(sctx); + } +} + void si_update_ps_colorbuf0_slot(struct si_context *sctx) { struct si_buffer_resources *buffers = &sctx->internal_bindings; struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; unsigned slot = SI_PS_IMAGE_COLORBUF0; struct pipe_surface *surf = NULL; + struct si_texture *tex = NULL; - /* si_texture_disable_dcc can get us here again. */ - if (sctx->in_update_ps_colorbuf0_slot || sctx->blitter_running) { - assert(!sctx->ps_uses_fbfetch || sctx->framebuffer.state.cbufs[0]); + /* FBFETCH is always disabled for u_blitter, and will be re-enabled after u_blitter is done. */ + if (sctx->blitter_running || sctx->suppress_update_ps_colorbuf0_slot) { + assert(!sctx->ps_uses_fbfetch); return; } - sctx->in_update_ps_colorbuf0_slot = true; - /* See whether FBFETCH is used and color buffer 0 is set. */ + /* Get the color buffer if FBFETCH should be enabled. */ if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output && - sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0]) + sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0]) { surf = sctx->framebuffer.state.cbufs[0]; + if (surf) { + tex = (struct si_texture *)surf->texture; + assert(tex && !tex->is_depth); + } + } /* Return if FBFETCH transitions from disabled to disabled. */ - if (!buffers->buffers[slot] && !surf) { - assert(!sctx->ps_uses_fbfetch); - sctx->in_update_ps_colorbuf0_slot = false; + if (!sctx->ps_uses_fbfetch && !surf) return; - } - - sctx->ps_uses_fbfetch = surf != NULL; - si_update_ps_iter_samples(sctx); if (surf) { - struct si_texture *tex = (struct si_texture *)surf->texture; - struct pipe_image_view view = {0}; + bool disable_dcc = tex->surface.meta_offset != 0; + bool disable_cmask = tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer; - assert(tex); - assert(!tex->is_depth); - - /* Disable DCC, because the texture is used as both a sampler + /* Disable DCC and eliminate fast clear because the texture is used as both a sampler * and color buffer. */ - si_texture_disable_dcc(sctx, tex); + if (disable_dcc || disable_cmask) { + /* Disable fbfetch only for decompression. */ + si_force_disable_ps_colorbuf0_slot(sctx); + sctx->suppress_update_ps_colorbuf0_slot = true; + + si_texture_disable_dcc(sctx, tex); - if (tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer) { - /* Disable CMASK. */ - assert(tex->cmask_buffer != &tex->buffer); - si_eliminate_fast_color_clear(sctx, tex, NULL); - si_texture_discard_cmask(sctx->screen, tex); + if (disable_cmask) { + assert(tex->cmask_buffer != &tex->buffer); + si_eliminate_fast_color_clear(sctx, tex, NULL); + si_texture_discard_cmask(sctx->screen, tex); + } + + sctx->suppress_update_ps_colorbuf0_slot = false; } + /* Bind color buffer 0 as a shader image. */ + struct pipe_image_view view = {0}; view.resource = surf->texture; view.format = surf->format; view.access = PIPE_IMAGE_ACCESS_READ; @@ -1011,7 +1024,9 @@ void si_update_ps_colorbuf0_slot(struct si_context *sctx) } sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; - sctx->in_update_ps_colorbuf0_slot = false; + sctx->ps_uses_fbfetch = surf != NULL; + si_update_ps_iter_samples(sctx); + si_ps_key_update_framebuffer(sctx); } /* SAMPLER STATES */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 95e2521..a043f42 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1005,7 +1005,7 @@ struct si_context { uint16_t prefetch_L2_mask; bool blitter_running; - bool in_update_ps_colorbuf0_slot; + bool suppress_update_ps_colorbuf0_slot; bool is_noop:1; bool has_graphics:1; bool gfx_flush_in_progress : 1; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8e2eb8d..8d296de 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -490,6 +490,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ bool is_stencil, uint16_t access, uint32_t * restrict state); void si_update_ps_colorbuf0_slot(struct si_context *sctx); +void si_force_disable_ps_colorbuf0_slot(struct si_context *sctx); void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader); void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, struct pipe_constant_buffer *cbuf); -- 2.7.4