panfrost: Precompile shaders
authorAlyssa Rosenzweig <alyssa@collabora.com>
Fri, 14 Oct 2022 21:14:46 +0000 (17:14 -0400)
committerMarge Bot <emma+marge@anholt.net>
Wed, 2 Nov 2022 16:52:11 +0000 (16:52 +0000)
We have no vertex shader key, and unless legacy GL features are used, the
fragment shader key is known ahead-of-time. That means we can precompile shaders
at CSO create time, hopefully avoiding some draw-time jank.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19363>

src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_screen.c
src/gallium/drivers/panfrost/pan_shader.c
src/panfrost/lib/pan_util.h

index 65a78b6..3356fbc 100644 (file)
@@ -260,8 +260,8 @@ struct pan_linkage {
  * shaders with varying emulated features baked in
  */
 struct panfrost_fs_key {
-        /* Number of colour buffers */
-        unsigned nr_cbufs;
+        /* Number of colour buffers if gl_FragColor is written */
+        unsigned nr_cbufs_for_fragcolor;
 
         /* On Valhall, fixed_varying_mask of the linked vertex shader */
         uint32_t fixed_varying_mask;
index 7cba67a..bdf41dc 100644 (file)
@@ -59,7 +59,6 @@ static const struct debug_named_value panfrost_debug_options[] = {
         {"deqp",      PAN_DBG_DEQP,     "Hacks for dEQP"},
         {"dirty",     PAN_DBG_DIRTY,    "Always re-emit all state"},
         {"sync",      PAN_DBG_SYNC,     "Wait for each job's completion and abort on GPU faults"},
-        {"precompile", PAN_DBG_PRECOMPILE, "Precompile shaders for shader-db"},
         {"nofp16",     PAN_DBG_NOFP16,     "Disable 16-bit support"},
         {"gl3",       PAN_DBG_GL3,      "Enable experimental GL 3.x implementation, up to 3.3"},
         {"noafbc",    PAN_DBG_NO_AFBC,  "Disable AFBC support"},
index af4a53b..26f3acf 100644 (file)
@@ -78,7 +78,10 @@ panfrost_shader_compile(struct pipe_screen *pscreen,
         if (s->info.stage == MESA_SHADER_FRAGMENT) {
                 inputs.fixed_varying_mask = state->key.fs.fixed_varying_mask;
 
-                NIR_PASS_V(s, nir_lower_fragcolor, state->key.fs.nr_cbufs);
+                if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+                        NIR_PASS_V(s, nir_lower_fragcolor,
+                                   state->key.fs.nr_cbufs_for_fragcolor);
+                }
 
                 if (state->key.fs.sprite_coord_enable) {
                         NIR_PASS_V(s, nir_lower_texcoord_replace,
@@ -146,7 +149,10 @@ panfrost_build_key(struct panfrost_context *ctx,
         struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
         struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
 
-        key->fs.nr_cbufs = fb->nr_cbufs;
+        /* gl_FragColor lowering needs the number of colour buffers */
+        if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+                key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
+        }
 
         /* Point sprite lowering needed on Bifrost and newer */
         if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
@@ -328,7 +334,6 @@ panfrost_create_shader_state(
         const struct pipe_shader_state *cso)
 {
         struct panfrost_uncompiled_shader *so = panfrost_alloc_shader();
-        struct panfrost_device *dev = pan_device(pctx->screen);
 
         so->stream_output = cso->stream_output;
 
@@ -366,16 +371,32 @@ panfrost_create_shader_state(
                 so->nir->info.has_transform_feedback_varyings = false;
         }
 
-        /* Precompile for shader-db if we need to */
-        if (unlikely(dev->debug & PAN_DBG_PRECOMPILE)) {
-                struct panfrost_compiled_shader state = { 0 };
+        /* Compile the program. We don't use vertex shader keys, so there will
+         * be no further vertex shader variants. We do have fragment shader
+         * keys, but we can still compile with a default key that will work most
+         * of the time.
+         */
+        struct panfrost_shader_key key = { 0 };
 
-                panfrost_shader_compile(pctx->screen,
-                                        &ctx->shaders, &ctx->descs,
-                                        so->nir, dbg, &state, 0,
-                                        so->fixed_varying_mask);
+        /* gl_FragColor lowering needs the number of colour buffers on desktop
+         * GL, where it acts as an implicit broadcast to all colour buffers.
+         *
+         * However, gl_FragColor is a legacy feature, so assume that if
+         * gl_FragColor is used, there is only a single render target. The
+         * implicit broadcast is neither especially useful nor required by GLES.
+         */
+        if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
+            so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+
+                key.fs.nr_cbufs_for_fragcolor = 1;
         }
 
+        /* Creating a CSO is single-threaded, so it's ok to use the
+         * locked function without explicitly taking the lock. Creating a
+         * default variant acts as a precompile.
+         */
+        panfrost_new_variant_locked(ctx, so, &key);
+
         return so;
 }
 
index 3962890..c2f8837 100644 (file)
@@ -37,7 +37,7 @@
 #define PAN_DBG_DEQP            0x0004
 #define PAN_DBG_DIRTY           0x0008
 #define PAN_DBG_SYNC            0x0010
-#define PAN_DBG_PRECOMPILE      0x0020
+/* 0x20 unused */
 #define PAN_DBG_NOFP16          0x0040
 #define PAN_DBG_NO_CRC          0x0080
 #define PAN_DBG_GL3             0x0100