From 87149cc545afdacb339a933d47ded5c1adf8f429 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 8 Jun 2023 08:52:28 +0300 Subject: [PATCH] blorp: update and move fast clear PIPE_CONTROLs to drivers Before this patch, when updating the indirect clear color, BLORP only invalidated the texture cache on gfx11. The hardware docs state that the texture cache invalidation is also needed on gfx12 however. Add this invalidation for gfx12 and move the fast-clear related cache invalidations to the drivers for clarity and performance. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5850 Reviewed-by: Nanley Chery Part-of: --- src/gallium/drivers/iris/iris_clear.c | 76 ++++++++++++++++++++--- src/intel/blorp/blorp_genX_exec.h | 31 ---------- src/intel/vulkan/anv_blorp.c | 112 ++++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index d9eb087..f5ade3b 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -254,16 +254,72 @@ fast_clear_color(struct iris_context *ice, * and again afterwards to ensure that the resolve is complete before we * do any more regular drawing. */ - iris_emit_end_of_pipe_sync(batch, - "fast clear: pre-flush", - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_TILE_CACHE_FLUSH | - (devinfo->verx10 == 120 ? - PIPE_CONTROL_DEPTH_STALL : 0) | - (devinfo->verx10 == 125 ? - PIPE_CONTROL_FLUSH_HDC | - PIPE_CONTROL_DATA_CACHE_FLUSH : 0) | - PIPE_CONTROL_PSS_STALL_SYNC); + enum pipe_control_flags pc_flags = + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH | + (devinfo->verx10 == 120 ? PIPE_CONTROL_DEPTH_STALL : 0) | + (devinfo->verx10 == 125 ? PIPE_CONTROL_FLUSH_HDC | + PIPE_CONTROL_DATA_CACHE_FLUSH : 0) | + PIPE_CONTROL_PSS_STALL_SYNC; + + /* From the ICL PRMs, Volume 9: Render Engine, State Caching : + * + * "Any values referenced by pointers within the RENDER_SURFACE_STATE or + * SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or Indirect + * State Pointer) are considered to be part of that state and any + * changes to these referenced values requires an invalidation of the + * L1 state cache to ensure the new values are being used as part of + * the state. In the case of surface data pointed to by the Surface + * Base Address in RENDER SURFACE STATE, the Texture Cache must be + * invalidated if the surface data changes." + * + * and From the Render Target Fast Clear section, + * + * "HwManaged FastClear allows SW to store FastClearValue in separate + * graphics allocation, instead of keeping them in RENDER_SURFACE_STATE. + * This behavior can be enabled by setting ClearValueAddressEnable in + * RENDER_SURFACE_STATE. + * + * Proper sequence of commands is as follows: + * + * 1. Storing clear color to allocation. + * 2. Ensuring that step 1. is finished and visible for TextureCache. + * 3. Performing FastClear. + * + * Step 2. is required on products with ClearColorConversion feature. + * This feature is enabled by setting ClearColorConversionEnable. This + * causes HW to read stored color from ClearColorAllocation and write + * back with the native format or RenderTarget - and clear color needs + * to be present and visible. Reading is done from TextureCache, writing + * is done to RenderCache." + * + * We're going to change the clear color. Invalidate the texture cache now + * to ensure the clear color conversion feature works properly. Although + * the docs seem to require invalidating the texture cache after updating + * the clear color allocation, we can do this beforehand so long as we + * ensure: + * + * 1. Step 1 is complete before the texture cache is accessed in step 3. + * 2. We don't access the texture cache between invalidation and step 3. + * + * The second requirement is satisfied because we'll be performing step 1 + * and 3 right after invalidating. The first is satisfied because BLORP + * updates the clear color before performing the fast clear and it performs + * the synchronizations suggested by the Render Target Fast Clear section + * (not quoted here) to ensure its completion. + * + * While we're here, also invalidate the state cache as suggested. + * + * Due to a corruption reported in + * https://gitlab.freedesktop.org/mesa/mesa/-/issues/8853#note_2015707 when + * the clear color doesn´t change, we invalidate both caches always. + */ + if (devinfo->ver >= 11) { + pc_flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + } + + iris_emit_pipe_control_flush(batch, "fast clear: pre-flush", pc_flags); iris_batch_sync_region_start(batch); diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index fe87261..5161c47 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1645,29 +1645,6 @@ blorp_setup_binding_table(struct blorp_batch *batch, } } -#if GFX_VER >= 7 && GFX_VER < 12 - if (has_indirect_clear_color) { - /* Updating a surface state object may require that the state cache be - * invalidated. From the SKL PRM, Shared Functions -> State -> State - * Caching: - * - * Whenever the RENDER_SURFACE_STATE object in memory pointed to by - * the Binding Table Pointer (BTP) and Binding Table Index (BTI) is - * modified [...], the L1 state cache must be invalidated to ensure - * the new surface or sampler state is fetched from system memory. - * - * XXX - Investigate why exactly this invalidation is necessary to - * avoid Vulkan regressions on ICL. It's possible that the - * MI_ATOMIC used to update the clear color isn't correctly - * ordered with the pre-existing invalidation in - * blorp_update_clear_color(). - */ - blorp_emit(batch, GENX(PIPE_CONTROL), pipe) { - pipe.StateCacheInvalidationEnable = true; - } - } -#endif - return bind_offset; } @@ -1916,10 +1893,6 @@ blorp_update_clear_color(UNUSED struct blorp_batch *batch, { assert(info->clear_color_addr.buffer != NULL); #if GFX_VER == 11 - blorp_emit(batch, GENX(PIPE_CONTROL), pipe) { - pipe.CommandStreamerStallEnable = true; - } - /* 2 QWORDS */ const unsigned inlinedata_dw = 2 * 2; const unsigned num_dwords = GENX(MI_ATOMIC_length) + inlinedata_dw; @@ -1950,10 +1923,6 @@ blorp_update_clear_color(UNUSED struct blorp_batch *batch, dw[4] = info->clear_color.u32[3]; dw[5] = 0; - blorp_emit(batch, GENX(PIPE_CONTROL), pipe) { - pipe.StateCacheInvalidationEnable = true; - pipe.TextureCacheInvalidationEnable = true; - } #elif GFX_VER >= 9 /* According to Wa_2201730850, in the Clear Color Programming Note under diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 8b78e6c..c583300 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1248,6 +1248,62 @@ exec_ccs_op(struct anv_cmd_buffer *cmd_buffer, switch (ccs_op) { case ISL_AUX_OP_FAST_CLEAR: + /* From the ICL PRMs, Volume 9: Render Engine, State Caching : + * + * "Any values referenced by pointers within the RENDER_SURFACE_STATE + * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or + * Indirect State Pointer) are considered to be part of that state + * and any changes to these referenced values requires an + * invalidation of the L1 state cache to ensure the new values are + * being used as part of the state. In the case of surface data + * pointed to by the Surface Base Address in RENDER SURFACE STATE, + * the Texture Cache must be invalidated if the surface data + * changes." + * + * and From the Render Target Fast Clear section, + * + * "HwManaged FastClear allows SW to store FastClearValue in separate + * graphics allocation, instead of keeping them in + * RENDER_SURFACE_STATE. This behavior can be enabled by setting + * ClearValueAddressEnable in RENDER_SURFACE_STATE. + * + * Proper sequence of commands is as follows: + * + * 1. Storing clear color to allocation + * 2. Ensuring that step 1. is finished and visible for TextureCache + * 3. Performing FastClear + * + * Step 2. is required on products with ClearColorConversion feature. + * This feature is enabled by setting ClearColorConversionEnable. + * This causes HW to read stored color from ClearColorAllocation and + * write back with the native format or RenderTarget - and clear + * color needs to be present and visible. Reading is done from + * TextureCache, writing is done to RenderCache." + * + * We're going to change the clear color. Invalidate the texture cache + * now to ensure the clear color conversion feature works properly. + * Although the docs seem to require invalidating the texture cache + * after updating the clear color allocation, we can do this beforehand + * so long as we ensure: + * + * 1. Step 1 is complete before the texture cache is accessed in step 3 + * 2. We don't access the texture cache between invalidation and step 3 + * + * The second requirement is satisfied because we'll be performing step + * 1 and 3 right after invalidating. The first is satisfied because + * BLORP updates the clear color before performing the fast clear and it + * performs the synchronizations suggested by the Render Target Fast + * Clear section (not quoted here) to ensure its completion. + * + * While we're here, also invalidate the state cache as suggested. + */ + if (devinfo->ver >= 11) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, + "before blorp clear color update"); + } + blorp_fast_clear(batch, &surf, format, swizzle, level, base_layer, layer_count, 0, 0, level_width, level_height); @@ -1346,6 +1402,62 @@ exec_mcs_op(struct anv_cmd_buffer *cmd_buffer, switch (mcs_op) { case ISL_AUX_OP_FAST_CLEAR: + /* From the ICL PRMs, Volume 9: Render Engine, State Caching : + * + * "Any values referenced by pointers within the RENDER_SURFACE_STATE + * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or + * Indirect State Pointer) are considered to be part of that state + * and any changes to these referenced values requires an + * invalidation of the L1 state cache to ensure the new values are + * being used as part of the state. In the case of surface data + * pointed to by the Surface Base Address in RENDER SURFACE STATE, + * the Texture Cache must be invalidated if the surface data + * changes." + * + * and From the Render Target Fast Clear section, + * + * "HwManaged FastClear allows SW to store FastClearValue in separate + * graphics allocation, instead of keeping them in + * RENDER_SURFACE_STATE. This behavior can be enabled by setting + * ClearValueAddressEnable in RENDER_SURFACE_STATE. + * + * Proper sequence of commands is as follows: + * + * 1. Storing clear color to allocation + * 2. Ensuring that step 1. is finished and visible for TextureCache + * 3. Performing FastClear + * + * Step 2. is required on products with ClearColorConversion feature. + * This feature is enabled by setting ClearColorConversionEnable. + * This causes HW to read stored color from ClearColorAllocation and + * write back with the native format or RenderTarget - and clear + * color needs to be present and visible. Reading is done from + * TextureCache, writing is done to RenderCache." + * + * We're going to change the clear color. Invalidate the texture cache + * now to ensure the clear color conversion feature works properly. + * Although the docs seem to require invalidating the texture cache + * after updating the clear color allocation, we can do this beforehand + * so long as we ensure: + * + * 1. Step 1 is complete before the texture cache is accessed in step 3 + * 2. We don't access the texture cache between invalidation and step 3 + * + * The second requirement is satisfied because we'll be performing step + * 1 and 3 right after invalidating. The first is satisfied because + * BLORP updates the clear color before performing the fast clear and it + * performs the synchronizations suggested by the Render Target Fast + * Clear section (not quoted here) to ensure its completion. + * + * While we're here, also invalidate the state cache as suggested. + */ + if (devinfo->ver >= 11) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, + "before blorp clear color update"); + } + blorp_fast_clear(batch, &surf, format, swizzle, 0, base_layer, layer_count, 0, 0, image->vk.extent.width, image->vk.extent.height); -- 2.7.4