From c85ea824bcab971dc2d9052b5dc937ee4b139cf5 Mon Sep 17 00:00:00 2001 From: Felix DeGrood Date: Mon, 29 Mar 2021 17:11:42 -0700 Subject: [PATCH] iris: reduce redundant tile cache flushes We are flushing tile cache more often than is necessary. In unified cache mode, tile cache flushing is expensive, evicting all depth/pixel data from the L3$. This is only need for a handful of cases, such as: making cpu or gpu changes globally visible (e.g. map), fast color clears, or slow depth clears. Tile cache flushing is a gen12+ feature. Remove blanket flushing of tile cache on all depth/RT flushes. Replace with selective tile cache flushing. Improves performance in several workloads: AztecRuins.ogl-high-offscreen-1440p 1% UnigineValley.ogl-g2 1% Dota 2 (replay Jul 2020).ogl-g2 1% Counter-Strike GO.ogl-g2 1% Manhattan.ogl-Off-19x10 2% CarChase.ogl-Off-19x10 1% Bioshock Infinite.ogl-g2 1% Reviewed-by: Kenneth Graunke Part-of: --- src/gallium/drivers/iris/iris_clear.c | 8 ++++++-- src/gallium/drivers/iris/iris_context.h | 1 + src/gallium/drivers/iris/iris_fine_fence.c | 1 + src/gallium/drivers/iris/iris_pipe_control.c | 1 + src/gallium/drivers/iris/iris_resolve.c | 1 + src/gallium/drivers/iris/iris_resource.c | 4 +++- src/gallium/drivers/iris/iris_state.c | 20 ++------------------ 7 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index 099474e..a2901d6 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -295,7 +295,8 @@ fast_clear_color(struct iris_context *ice, */ iris_emit_end_of_pipe_sync(batch, "fast clear: pre-flush", - PIPE_CONTROL_RENDER_TARGET_FLUSH); + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH); iris_batch_sync_region_start(batch); @@ -493,6 +494,8 @@ fast_clear_depth(struct iris_context *ice, ISL_AUX_OP_FULL_RESOLVE, false); iris_resource_set_aux_state(ice, res, res_level, layer, 1, ISL_AUX_STATE_RESOLVED); + iris_emit_pipe_control_flush(batch, "hiz op: post depth resolve", + PIPE_CONTROL_TILE_CACHE_FLUSH); } } const union isl_color_value clear_value = { .f32 = {depth, } }; @@ -607,7 +610,8 @@ clear_depth_stencil(struct iris_context *ice, blorp_batch_finish(&blorp_batch); iris_batch_sync_region_end(batch); - iris_flush_and_dirty_for_history(ice, batch, res, 0, + iris_flush_and_dirty_for_history(ice, batch, res, + PIPE_CONTROL_TILE_CACHE_FLUSH, "cache history: post slow ZS clear"); if (clear_depth && z_res) { diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 865a82a..6403ef4 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -336,6 +336,7 @@ enum pipe_control_flags #define PIPE_CONTROL_CACHE_FLUSH_BITS \ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ PIPE_CONTROL_DATA_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ PIPE_CONTROL_RENDER_TARGET_FLUSH) #define PIPE_CONTROL_CACHE_INVALIDATE_BITS \ diff --git a/src/gallium/drivers/iris/iris_fine_fence.c b/src/gallium/drivers/iris/iris_fine_fence.c index a114013..0470389 100644 --- a/src/gallium/drivers/iris/iris_fine_fence.c +++ b/src/gallium/drivers/iris/iris_fine_fence.c @@ -66,6 +66,7 @@ iris_fine_fence_new(struct iris_batch *batch, unsigned flags) } else { pc = PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH; } diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c index 9f7ac24..9768951 100644 --- a/src/gallium/drivers/iris/iris_pipe_control.c +++ b/src/gallium/drivers/iris/iris_pipe_control.c @@ -292,6 +292,7 @@ iris_flush_all_caches(struct iris_batch *batch) PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index 5a14b80..eaae971 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -366,6 +366,7 @@ iris_cache_flush_for_render(struct iris_batch *batch, iris_emit_pipe_control_flush(batch, "cache tracker: aux usage mismatch", PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_CS_STALL); entry->data = v_aux_usage; } diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index d309f90..f61c835 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -1540,6 +1540,7 @@ iris_map_copy_region(struct iris_transfer *map) iris_emit_pipe_control_flush(map->batch, "transfer read: flush before mapping", PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_CS_STALL); } @@ -2051,7 +2052,8 @@ iris_transfer_flush_region(struct pipe_context *ctx, if (res->base.b.target == PIPE_BUFFER) { if (map->staging) - history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH; + history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_TILE_CACHE_FLUSH; if (map->dest_had_defined_contents) history_flush |= iris_flush_bits_for_history(ice, res); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 9076199..5f88239 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -7616,23 +7616,6 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, flags |= PIPE_CONTROL_CS_STALL; } - if (GFX_VER >= 12 && ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) || - (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))) { - /* From the PIPE_CONTROL instruction table, bit 28 (Tile Cache Flush - * Enable): - * - * Unified Cache (Tile Cache Disabled): - * - * When the Color and Depth (Z) streams are enabled to be cached in - * the DC space of L2, Software must use "Render Target Cache Flush - * Enable" and "Depth Cache Flush Enable" along with "Tile Cache - * Flush" for getting the color and depth (Z) write data to be - * globally observable. In this mode of operation it is not required - * to set "CS Stall" upon setting "Tile Cache Flush" bit. - */ - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - } - if (GFX_VER == 9 && devinfo->gt == 4) { /* TODO: The big Skylake GT4 post sync op workaround */ } @@ -7737,7 +7720,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) { fprintf(stderr, - " PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n", + " PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n", (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "", (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "", (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "", @@ -7747,6 +7730,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "", (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "", (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "", + (flags & PIPE_CONTROL_TILE_CACHE_FLUSH) ? "Tile " : "", (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "", (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "", (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "", -- 2.7.4