From: Kenneth Graunke Date: Wed, 19 Jun 2019 21:04:50 +0000 (-0500) Subject: iris: Implement INTEL_DEBUG=pc for pipe control logging. X-Git-Tag: upstream/19.3.0~5254 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d4a4384b315a4b74357b30f868f4d1c25a571083;p=platform%2Fupstream%2Fmesa.git iris: Implement INTEL_DEBUG=pc for pipe control logging. This prints a log of every PIPE_CONTROL flush we emit, noting which bits were set, and also the reason for the flush. That way we can see which are caused by hardware workarounds, render-to-texture, buffer updates, and so on. It should make it easier to determine whether we're doing too many flushes and why. --- diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index cb44994..6007849 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -616,7 +616,8 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) iris_finish_batch(batch); - if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { + if (unlikely(INTEL_DEBUG & + (DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL))) { int bytes_for_commands = iris_batch_bytes_used(batch); int second_bytes = 0; if (batch->bo != batch->exec_bos[0]) { @@ -630,12 +631,15 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) 100.0f * bytes_for_commands / BATCH_SZ, batch->exec_count, (float) batch->aperture_space / (1024 * 1024)); - dump_fence_list(batch); - dump_validation_list(batch); - } - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - decode_batch(batch); + if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT)) { + dump_fence_list(batch); + dump_validation_list(batch); + } + + if (INTEL_DEBUG & DEBUG_BATCH) { + decode_batch(batch); + } } int ret = submit_batch(batch); diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c index 6f57dc8..47c2c96 100644 --- a/src/gallium/drivers/iris/iris_blit.c +++ b/src/gallium/drivers/iris/iris_blit.c @@ -289,8 +289,12 @@ tex_cache_flush_hack(struct iris_batch *batch) * * TODO: Remove this hack! */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); + const char *reason = + "workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads"; + + iris_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, reason, + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); } /** @@ -488,7 +492,8 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) info->dst.box.depth, dst_aux_usage); iris_flush_and_dirty_for_history(ice, batch, (struct iris_resource *) - info->dst.resource); + info->dst.resource, + "cache history: post-blit"); } static void @@ -569,7 +574,8 @@ iris_copy_region(struct blorp_context *blorp, blorp_batch_finish(&blorp_batch); iris_flush_and_dirty_for_history(ice, batch, - (struct iris_resource *) dst); + (struct iris_resource *) dst, + "cache history: post copy_region"); } else { // XXX: what about one surface being a buffer and not the other? diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 895bfb7..209940e 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -219,8 +219,10 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch, } if (need_invalidate) { - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, + "workaround: VF cache 32-bit key [blorp]", + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CS_STALL); } } @@ -279,6 +281,7 @@ iris_blorp_exec(struct blorp_batch *blorp_batch, * be set in this packet." */ iris_emit_pipe_control_flush(batch, + "workaround: RT BTI change [blorp]", PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_STALL_AT_SCOREBOARD); #endif diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index 45030eb..2334807 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -249,7 +249,9 @@ fast_clear_color(struct iris_context *ice, * and again afterwards to ensure that the resolve is complete before we * do any more regular drawing. */ - iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_emit_end_of_pipe_sync(batch, + "fast clear: pre-flush", + PIPE_CONTROL_RENDER_TARGET_FLUSH); /* If we reach this point, we need to fast clear to change the state to * ISL_AUX_STATE_CLEAR, or to update the fast clear color (or both). @@ -274,7 +276,9 @@ fast_clear_color(struct iris_context *ice, box->x, box->y, box->x + box->width, box->y + box->height); blorp_batch_finish(&blorp_batch); - iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_emit_end_of_pipe_sync(batch, + "fast clear: post flush", + PIPE_CONTROL_RENDER_TARGET_FLUSH); iris_resource_set_aux_state(ice, res, level, box->z, box->depth, ISL_AUX_STATE_CLEAR); @@ -344,7 +348,8 @@ clear_color(struct iris_context *ice, color, color_write_disable); blorp_batch_finish(&blorp_batch); - iris_flush_and_dirty_for_history(ice, batch, res); + iris_flush_and_dirty_for_history(ice, batch, res, + "cache history: post color clear"); iris_resource_finish_render(ice, res, level, box->z, box->depth, aux_usage); @@ -510,7 +515,8 @@ clear_depth_stencil(struct iris_context *ice, if (z_res && clear_depth && can_fast_clear_depth(ice, z_res, level, box, depth)) { fast_clear_depth(ice, z_res, level, box, depth); - iris_flush_and_dirty_for_history(ice, batch, res); + iris_flush_and_dirty_for_history(ice, batch, res, + "cache history: post fast Z clear"); clear_depth = false; z_res = false; } @@ -546,7 +552,8 @@ clear_depth_stencil(struct iris_context *ice, clear_stencil && stencil_res ? 0xff : 0, stencil); blorp_batch_finish(&blorp_batch); - iris_flush_and_dirty_for_history(ice, batch, res); + iris_flush_and_dirty_for_history(ice, batch, res, + "cache history: post slow ZS clear"); if (z_res) { iris_resource_finish_depth(ice, z_res, level, diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index e319ecb..dbaec56 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -455,7 +455,8 @@ struct iris_vtable { struct iris_bo *dst_bo, uint32_t dst_offset, struct iris_bo *src_bo, uint32_t src_offset, unsigned bytes); - void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags, + void (*emit_raw_pipe_control)(struct iris_batch *batch, + const char *reason, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm); @@ -771,12 +772,13 @@ void iris_launch_grid(struct pipe_context *, const struct pipe_grid_info *); /* iris_pipe_control.c */ void iris_emit_pipe_control_flush(struct iris_batch *batch, - uint32_t flags); -void iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags, + const char *reason, uint32_t flags); +void iris_emit_pipe_control_write(struct iris_batch *batch, + const char *reason, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm); void iris_emit_end_of_pipe_sync(struct iris_batch *batch, - uint32_t flags); + const char *reason, uint32_t flags); void iris_init_flush_functions(struct pipe_context *ctx); diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c index f686ef7..65879b4 100644 --- a/src/gallium/drivers/iris/iris_pipe_control.c +++ b/src/gallium/drivers/iris/iris_pipe_control.c @@ -55,7 +55,9 @@ * given generation. */ void -iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags) +iris_emit_pipe_control_flush(struct iris_batch *batch, + const char *reason, + uint32_t flags) { if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { @@ -70,11 +72,12 @@ iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags) * with any write cache flush, so this shouldn't be a concern. In order * to ensure a full stall, we do an end-of-pipe sync. */ - iris_emit_end_of_pipe_sync(batch, flags & PIPE_CONTROL_CACHE_FLUSH_BITS); + iris_emit_end_of_pipe_sync(batch, reason, + flags & PIPE_CONTROL_CACHE_FLUSH_BITS); flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); } - batch->vtbl->emit_raw_pipe_control(batch, flags, NULL, 0, 0); + batch->vtbl->emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0); } /** @@ -86,11 +89,12 @@ iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags) * - PIPE_CONTROL_WRITE_DEPTH_COUNT */ void -iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags, +iris_emit_pipe_control_write(struct iris_batch *batch, + const char *reason, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm) { - batch->vtbl->emit_raw_pipe_control(batch, flags, bo, offset, imm); + batch->vtbl->emit_raw_pipe_control(batch, reason, flags, bo, offset, imm); } /* @@ -116,7 +120,8 @@ iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags, * Data" in the PIPE_CONTROL command. */ void -iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags) +iris_emit_end_of_pipe_sync(struct iris_batch *batch, + const char *reason, uint32_t flags) { /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": * @@ -140,7 +145,8 @@ iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags) * Data, Required Write Cache Flush bits set) * - Workload-2 (Can use the data produce or output by Workload-1) */ - iris_emit_pipe_control_write(batch, flags | PIPE_CONTROL_CS_STALL | + iris_emit_pipe_control_write(batch, reason, + flags | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE, batch->screen->workaround_bo, 0, 0); } @@ -156,17 +162,21 @@ iris_texture_barrier(struct pipe_context *ctx, unsigned flags) render_batch->cache.render->entries || render_batch->cache.depth->entries) { iris_emit_pipe_control_flush(render_batch, + "API: texture barrier (1/2)", PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_CS_STALL); iris_emit_pipe_control_flush(render_batch, + "API: texture barrier (2/2)", PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); } if (compute_batch->contains_draw) { iris_emit_pipe_control_flush(compute_batch, + "API: texture barrier (1/2)", PIPE_CONTROL_CS_STALL); iris_emit_pipe_control_flush(compute_batch, + "API: texture barrier (2/2)", PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); } } @@ -195,8 +205,10 @@ iris_memory_barrier(struct pipe_context *ctx, unsigned flags) for (int i = 0; i < IRIS_BATCH_COUNT; i++) { if (ice->batches[i].contains_draw || - ice->batches[i].cache.render->entries) - iris_emit_pipe_control_flush(&ice->batches[i], bits); + ice->batches[i].cache.render->entries) { + iris_emit_pipe_control_flush(&ice->batches[i], "API: memory barrier", + bits); + } } } diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index d30011f..1a230b3 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -157,7 +157,8 @@ mark_available(struct iris_context *ice, struct iris_query *q) } else { /* Order available *after* the query results. */ flags |= PIPE_CONTROL_FLUSH_ENABLE; - iris_emit_pipe_control_write(batch, flags, bo, offset, true); + iris_emit_pipe_control_write(batch, "query: mark available", + flags, bo, offset, true); } } @@ -175,7 +176,8 @@ iris_pipelined_write(struct iris_batch *batch, devinfo->gen == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0; struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); - iris_emit_pipe_control_write(batch, flags | optional_cs_stall, + iris_emit_pipe_control_write(batch, "query: pipelined snapshot write", + flags | optional_cs_stall, bo, offset, 0ull); } @@ -188,6 +190,7 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) if (!iris_is_query_pipelined(q)) { iris_emit_pipe_control_flush(batch, + "query: non-pipelined snapshot write", PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); q->stalled = true; @@ -202,7 +205,10 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) * bit set prior to programming a PIPE_CONTROL with Write PS Depth * Count sync operation." */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL); + iris_emit_pipe_control_flush(batch, + "workaround: depth stall before writing " + "PS_DEPTH_COUNT", + PIPE_CONTROL_DEPTH_STALL); } iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q, PIPE_CONTROL_WRITE_DEPTH_COUNT | @@ -260,6 +266,7 @@ write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end) uint32_t offset = q->query_state_ref.offset; iris_emit_pipe_control_flush(batch, + "query: write SO overflow snapshots", PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); for (uint32_t i = 0; i < count; i++) { @@ -942,7 +949,9 @@ iris_get_query_result_resource(struct pipe_context *ctx, * and use the result. */ // XXX: Why? i965 doesn't do this. - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, + "query: unknown QBO flushing hack", + PIPE_CONTROL_CS_STALL); return; } @@ -1015,7 +1024,9 @@ set_predicate_for_result(struct iris_context *ice, ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT; /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE); + iris_emit_pipe_control_flush(batch, + "conditional rendering: set predicate", + PIPE_CONTROL_FLUSH_ENABLE); q->stalled = true; switch (q->type) { diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index ac2676c..d80b126 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -339,11 +339,13 @@ void iris_flush_depth_and_render_caches(struct iris_batch *batch) { iris_emit_pipe_control_flush(batch, + "cache tracker: render-to-texture", PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_CS_STALL); iris_emit_pipe_control_flush(batch, + "cache tracker: render-to-texture", PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE); @@ -465,7 +467,8 @@ iris_resolve_color(struct iris_context *ice, * and again afterwards to ensure that the resolve is complete before we * do any more regular drawing. */ - iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush", + PIPE_CONTROL_RENDER_TARGET_FLUSH); struct blorp_batch blorp_batch; blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); @@ -475,7 +478,8 @@ iris_resolve_color(struct iris_context *ice, blorp_batch_finish(&blorp_batch); /* See comment above */ - iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush", + PIPE_CONTROL_RENDER_TARGET_FLUSH); } static void @@ -622,10 +626,12 @@ iris_hiz_exec(struct iris_context *ice, * another for depth stall. */ iris_emit_pipe_control_flush(batch, + "hiz op: pre-flushes (1/2)", PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_CS_STALL); - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL); + iris_emit_pipe_control_flush(batch, "hiz op: pre-flushes (2/2)", + PIPE_CONTROL_DEPTH_STALL); assert(res->aux.usage == ISL_AUX_USAGE_HIZ && res->aux.bo); @@ -659,6 +665,7 @@ iris_hiz_exec(struct iris_context *ice, * TODO: Such as the spec says, this could be conditional. */ iris_emit_pipe_control_flush(batch, + "hiz op: post flush", PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DEPTH_STALL); } diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index 3d4bfd6..c696b09 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -1029,6 +1029,7 @@ iris_map_copy_region(struct iris_transfer *map) xfer->resource, xfer->level, box); /* Ensure writes to the staging BO land before we map it below. */ iris_emit_pipe_control_flush(map->batch, + "transfer read: flush before mapping", PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_CS_STALL); } @@ -1475,7 +1476,8 @@ iris_transfer_flush_region(struct pipe_context *ctx, if (ice->batches[i].contains_draw || ice->batches[i].cache.render->entries) { iris_batch_maybe_flush(&ice->batches[i], 24); - iris_flush_and_dirty_for_history(ice, &ice->batches[i], res); + iris_flush_and_dirty_for_history(ice, &ice->batches[i], res, + "cache history: transfer flush"); } } @@ -1559,7 +1561,8 @@ iris_flush_bits_for_history(struct iris_resource *res) void iris_flush_and_dirty_for_history(struct iris_context *ice, struct iris_batch *batch, - struct iris_resource *res) + struct iris_resource *res, + const char *reason) { if (res->base.target != PIPE_BUFFER) return; @@ -1572,7 +1575,7 @@ iris_flush_and_dirty_for_history(struct iris_context *ice, if (batch->name != IRIS_BATCH_COMPUTE) flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH; - iris_emit_pipe_control_flush(batch, flush); + iris_emit_pipe_control_flush(batch, reason, flush); } bool diff --git a/src/gallium/drivers/iris/iris_resource.h b/src/gallium/drivers/iris/iris_resource.h index 79b1505..419122c 100644 --- a/src/gallium/drivers/iris/iris_resource.h +++ b/src/gallium/drivers/iris/iris_resource.h @@ -267,7 +267,8 @@ uint32_t iris_flush_bits_for_history(struct iris_resource *res); void iris_flush_and_dirty_for_history(struct iris_context *ice, struct iris_batch *batch, - struct iris_resource *res); + struct iris_resource *res, + const char *reason); unsigned iris_get_num_logical_layers(const struct iris_resource *res, unsigned level); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 23f8d29..0984ae8 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -486,6 +486,7 @@ flush_for_state_base_change(struct iris_batch *batch) * rendering. It's a bit of a big hammer but it appears to work. */ iris_emit_end_of_pipe_sync(batch, + "change STATE_BASE_ADDRESS", PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH); @@ -539,12 +540,14 @@ emit_pipeline_select(struct iris_batch *batch, uint32_t pipeline) * MI_PIPELINE_SELECT command to change the Pipeline Select Mode." */ iris_emit_pipe_control_flush(batch, + "workaround: PIPELINE_SELECT flushes (1/2)", PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL); iris_emit_pipe_control_flush(batch, + "workaround: PIPELINE_SELECT flushes (2/2)", PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_STATE_CACHE_INVALIDATE | @@ -663,7 +666,9 @@ iris_enable_obj_preemption(struct iris_batch *batch, bool enable) uint32_t reg_val; /* A fixed function pipe flush is required before modifying this field */ - iris_emit_end_of_pipe_sync(batch, PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_emit_end_of_pipe_sync(batch, enable ? "enable preemption" + : "disable preemption", + PIPE_CONTROL_RENDER_TARGET_FLUSH); /* enable object level preemption */ iris_pack_state(GENX(CS_CHICKEN1), ®_val, reg) { @@ -2570,6 +2575,7 @@ iris_set_framebuffer_state(struct pipe_context *ctx, */ // XXX: does this need to happen at 3DSTATE_BTP_PS time? iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], + "workaround: RT BTI change [draw]", PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_STALL_AT_SCOREBOARD); #endif @@ -3048,7 +3054,8 @@ iris_set_stream_output_targets(struct pipe_context *ctx, iris_dirty_for_history(ice, res); } } - iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], flush); + iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER], + "make streamout results visible", flush); } } @@ -5131,8 +5138,11 @@ iris_upload_dirty_render_state(struct iris_context *ice, } } - if (flush_flags) - iris_emit_pipe_control_flush(batch, flush_flags); + if (flush_flags) { + iris_emit_pipe_control_flush(batch, + "workaround: VF cache 32-bit key [VB]", + flush_flags); + } const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); @@ -5331,8 +5341,10 @@ iris_upload_render_state(struct iris_context *ice, /* The VF cache key only uses 32-bits, see vertex buffer comment above */ uint16_t high_bits = bo->gtt_offset >> 32ull; if (high_bits != ice->state.last_index_bo_high_bits) { - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, + "workaround: VF cache 32-bit key [IB]", + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CS_STALL); ice->state.last_index_bo_high_bits = high_bits; } } @@ -5353,7 +5365,9 @@ iris_upload_render_state(struct iris_context *ice, unsigned draw_count_offset = draw->indirect->indirect_draw_count_offset; - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE); + iris_emit_pipe_control_flush(batch, + "ensure indirect draw buffer is flushed", + PIPE_CONTROL_FLUSH_ENABLE); if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { static const uint32_t math[] = { @@ -5459,7 +5473,9 @@ iris_upload_render_state(struct iris_context *ice, (void *) draw->count_from_stream_output; /* XXX: Replace with actual cache tracking */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, + "draw count from stream output stall", + PIPE_CONTROL_CS_STALL); iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { lrm.RegisterAddress = CS_GPR(0); @@ -5548,7 +5564,9 @@ iris_upload_compute_state(struct iris_context *ice, * these scoreboard related states, a MEDIA_STATE_FLUSH is * sufficient." */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); + iris_emit_pipe_control_flush(batch, + "workaround: stall before MEDIA_VFE_STATE", + PIPE_CONTROL_CS_STALL); iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) { if (prog_data->total_scratch) { @@ -6030,8 +6048,12 @@ get_post_sync_flags(enum pipe_control_flags flags) * iris_pipe_control.c instead, which may split the pipe control further. */ static void -iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, - struct iris_bo *bo, uint32_t offset, uint64_t imm) +iris_emit_raw_pipe_control(struct iris_batch *batch, + const char *reason, + uint32_t flags, + struct iris_bo *bo, + uint32_t offset, + uint64_t imm) { UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo; enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); @@ -6056,7 +6078,9 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, * needs to be sent prior to the PIPE_CONTROL with VF Cache * Invalidation Enable set to a 1." */ - iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0); + iris_emit_raw_pipe_control(batch, + "workaround: recursive VF cache invalidate", + 0, NULL, 0, 0); } if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) { @@ -6069,7 +6093,9 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, * * The same text exists a few rows below for Post Sync Op. */ - iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm); + iris_emit_raw_pipe_control(batch, + "workaround: CS stall before gpgpu post-sync", + PIPE_CONTROL_CS_STALL, bo, offset, imm); } if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { @@ -6078,8 +6104,9 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12) * = 0 and Pipe Control Flush Enable (bit 7) = 1" */ - iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo, - offset, imm); + iris_emit_raw_pipe_control(batch, + "workaround: PC flush before RT flush", + PIPE_CONTROL_FLUSH_ENABLE, bo, offset, imm); } /* "Flush Types" workarounds --------------------------------------------- @@ -6359,6 +6386,34 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, /* Emit --------------------------------------------------------------- */ + if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) { + fprintf(stderr, + " PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n", + (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "", + (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "", + (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "", + (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "", + (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "", + (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "", + (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "", + (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "", + (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "", + (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "", + (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "", + (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "", + (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "", + (flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "", + (flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "", + (flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ? + "SnapRes" : "", + (flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ? + "ISPDis" : "", + (flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "", + (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "", + (flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "", + imm, reason); + } + iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { pc.LRIPostSyncOperation = NoLRIOperation; pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; diff --git a/src/intel/dev/gen_debug.c b/src/intel/dev/gen_debug.c index dd58e6b..a482328 100644 --- a/src/intel/dev/gen_debug.c +++ b/src/intel/dev/gen_debug.c @@ -88,6 +88,7 @@ static const struct debug_control debug_control[] = { { "soft64", DEBUG_SOFT64 }, { "tcs8", DEBUG_TCS_EIGHT_PATCH }, { "bt", DEBUG_BT }, + { "pc", DEBUG_PIPE_CONTROL }, { NULL, 0 } }; diff --git a/src/intel/dev/gen_debug.h b/src/intel/dev/gen_debug.h index 0776114..edd3f8a 100644 --- a/src/intel/dev/gen_debug.h +++ b/src/intel/dev/gen_debug.h @@ -86,6 +86,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_SOFT64 (1ull << 42) #define DEBUG_TCS_EIGHT_PATCH (1ull << 43) #define DEBUG_BT (1ull << 44) +#define DEBUG_PIPE_CONTROL (1ull << 45) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME