panfrost: Prepack partial RSD at compile time
authorAlyssa Rosenzweig <alyssa@collabora.com>
Fri, 14 May 2021 22:00:45 +0000 (18:00 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 18 May 2021 22:51:56 +0000 (22:51 +0000)
Even for fragment shaders! Just need to merge the partial descriptors.

Fixes: c21c6d134bc ("panfrost: Use the pan_shader_prepare_rsd() helper")
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10869>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_context.h

index d34cb79..79fbc96 100644 (file)
 #include "tgsi/tgsi_dump.h"
 
 static void
-pan_upload_shader_descriptor(struct panfrost_context *ctx,
-                        struct panfrost_shader_state *state)
+pan_prepare_shader_descriptor(struct panfrost_context *ctx,
+                              struct panfrost_shader_state *state,
+                              bool upload)
 {
         const struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_ptr ptr =
-                panfrost_pool_alloc_desc(&ctx->descs, RENDERER_STATE);
+        struct mali_renderer_state_packed *out = &state->partial_rsd;
+
+        if (upload) {
+                struct panfrost_ptr ptr =
+                        panfrost_pool_alloc_desc(&ctx->descs, RENDERER_STATE);
 
-        state->state = pan_take_ref(&ctx->descs, ptr.gpu);
+                state->state = pan_take_ref(&ctx->descs, ptr.gpu);
+                out = ptr.cpu;
+        }
 
-        pan_pack(ptr.cpu, RENDERER_STATE, cfg) {
+        pan_pack(out, RENDERER_STATE, cfg) {
                 pan_shader_prepare_rsd(dev, &state->info, state->bin.gpu,
                                        &cfg);
         }
@@ -98,8 +104,8 @@ panfrost_shader_compile(struct panfrost_context *ctx,
                                 binary.data, binary.size, 128));
         }
 
-        if (stage != MESA_SHADER_FRAGMENT)
-                pan_upload_shader_descriptor(ctx, state);
+        pan_prepare_shader_descriptor(ctx, state, 
+                        stage != MESA_SHADER_FRAGMENT);
 
         util_dynarray_fini(&binary);
 
index c476358..049b83f 100644 (file)
@@ -460,7 +460,6 @@ static void
 panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
                                   struct MALI_RENDERER_STATE *state)
 {
-        const struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
         struct panfrost_blend_state *so = ctx->blend;
         bool alpha_to_coverage = so->base.alpha_to_coverage;
@@ -471,8 +470,6 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
                 state->properties.bifrost.allow_forward_pixel_to_be_killed = true;
                 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
         } else {
-                pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state);
-
                 /* Track if any colour buffer is reused across draws, either
                  * from reading it directly, or from failing to write it */
                 bool blend_reads_dest = false;
@@ -517,8 +514,6 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
                 state->properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
                 state->properties.midgard.force_early_z = true;
         } else {
-                pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state);
-
                 /* Reasons to disable early-Z from a shader perspective */
                 bool late_z = fs->info.fs.can_discard || fs->info.writes_global ||
                               fs->info.fs.writes_depth || fs->info.fs.writes_stencil ||
@@ -653,8 +648,15 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx,
                           mali_ptr *blend_shaders)
 {
         struct panfrost_device *dev = pan_device(ctx->base.screen);
+        struct panfrost_shader_state *fs =
+                panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 
-        pan_pack(fragmeta, RENDERER_STATE, cfg) {
+        /* We need to merge several several partial renderer state descriptors,
+         * so stage to temporary storage rather than reading back write-combine
+         * memory, which will trash performance. */
+        struct mali_renderer_state_packed rsd;
+
+        pan_pack(&rsd, RENDERER_STATE, cfg) {
                 panfrost_prepare_fs_state(ctx, blend_shaders, &cfg);
         }
 
@@ -664,8 +666,14 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx,
 
                 /* Word 14: SFBD Blend Equation */
                 STATIC_ASSERT(MALI_BLEND_EQUATION_LENGTH == 4);
-                fragmeta->opaque[14] = ctx->blend->equation[0].opaque[0];
+                rsd.opaque[14] = ctx->blend->equation[0].opaque[0];
         }
+
+        /* Merge with CSO state and upload */
+        if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer))
+                pan_merge(rsd, fs->partial_rsd, RENDERER_STATE);
+
+        memcpy(fragmeta, &rsd, sizeof(rsd));
 }
 
 mali_ptr
index 70075f7..5208c8d 100644 (file)
@@ -214,6 +214,9 @@ struct panfrost_shader_state {
         /* Respectively, shader binary and Renderer State Descriptor */
         struct pan_pool_ref bin, state;
 
+        /* For fragment shaders, a prepared (but not uploaded RSD) */
+        struct mali_renderer_state_packed partial_rsd;
+
         struct pan_shader_info info;
 
         struct pipe_stream_output_info stream_output;