From 1085f74239f2b81e4e17ece4b9b7a805ee8dd250 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 21 May 2020 15:49:30 -0400 Subject: [PATCH] panfrost: Avoid redundant shader executions with mask=0x0 Only works for a few Midgard GPUs, but hey. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 8 +++++++ src/gallium/drivers/panfrost/pan_cmdstream.c | 36 ++++++++++++++++++++++++++++ src/gallium/drivers/panfrost/pan_context.h | 4 ++++ src/panfrost/include/panfrost-quirks.h | 7 +++++- 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 53bf9ca..f804e2b 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -196,6 +196,14 @@ panfrost_shader_compile(struct panfrost_context *ctx, state->writes_depth = true; if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) state->writes_stencil = true; + + /* List of reasons we need to execute frag shaders when things + * are masked off */ + + state->fs_sidefx = + s->info.writes_memory || + s->info.fs.uses_discard || + s->info.fs.uses_demote; break; case MESA_SHADER_COMPUTE: /* TODO: images */ diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index caf0166..59a1799 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -620,6 +620,27 @@ panfrost_frag_meta_zsa_update(struct panfrost_context *ctx, fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc)); } +static bool +panfrost_fs_required( + struct panfrost_shader_state *fs, + struct panfrost_blend_final *blend, + unsigned rt_count) +{ + /* If we generally have side effects */ + if (fs->fs_sidefx) + return true; + + /* If colour is written we need to execute */ + for (unsigned i = 0; i < rt_count; ++i) { + if (!blend[i].no_colour) + return true; + } + + /* If depth is written and not implied we need to execute. + * TODO: Predicate on Z/S writes being enabled */ + return (fs->writes_depth || fs->writes_stencil); +} + static void panfrost_frag_meta_blend_update(struct panfrost_context *ctx, struct mali_shader_meta *fragmeta, @@ -642,6 +663,21 @@ panfrost_frag_meta_blend_update(struct panfrost_context *ctx, blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); + /* Disable shader execution if we can */ + if (dev->quirks & MIDGARD_SHADERLESS + && !panfrost_fs_required(fs, blend, rt_count)) { + fragmeta->shader = 0; + fragmeta->attribute_count = 0; + fragmeta->varying_count = 0; + fragmeta->texture_count = 0; + fragmeta->sampler_count = 0; + + /* This feature is not known to work on Bifrost */ + fragmeta->midgard1.work_count = 1; + fragmeta->midgard1.uniform_count = 0; + fragmeta->midgard1.uniform_buffer_count = 0; + } + /* If there is a blend shader, work registers are shared. We impose 8 * work registers as a limit for blend shaders. Should be lower XXX */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index c0a6d5f..a4ea44a 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -195,6 +195,10 @@ struct panfrost_shader_state { unsigned stack_size; unsigned shared_size; + /* Does the fragment shader have side effects? In particular, if output + * is masked out, is it legal to skip shader execution? */ + bool fs_sidefx; + /* For Bifrost - output type for each RT */ enum bifrost_shader_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT]; diff --git a/src/panfrost/include/panfrost-quirks.h b/src/panfrost/include/panfrost-quirks.h index e45191f..1d2ca77 100644 --- a/src/panfrost/include/panfrost-quirks.h +++ b/src/panfrost/include/panfrost-quirks.h @@ -50,6 +50,11 @@ /* What it says on the tin */ #define HAS_SWIZZLES (1 << 4) +/* Support for setting shader to NULL for masking out colour (while allowing + * Z/S updates to proceed) */ + +#define MIDGARD_SHADERLESS (1 << 5) + /* Quirk collections common to particular uarchs */ #define MIDGARD_QUIRKS (MIDGARD_BROKEN_FP16 | HAS_SWIZZLES) @@ -74,7 +79,7 @@ panfrost_get_quirks(unsigned gpu_id) case 0x750: case 0x860: case 0x880: - return MIDGARD_QUIRKS; + return MIDGARD_QUIRKS | MIDGARD_SHADERLESS; case 0x6000: /* G71 */ return BIFROST_QUIRKS | HAS_SWIZZLES; -- 2.7.4