freedreno/ir3: Improve shader key normalization.
authorEric Anholt <eric@anholt.net>
Tue, 14 Apr 2020 23:34:00 +0000 (16:34 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 1 May 2020 16:26:32 +0000 (16:26 +0000)
We can remove a bunch of conditional code at key comparison time by
computing a bitmask of used key bits at ir3_shader creation time.  This
also gives us a nice place to put additional key simplification to reduce
how many variants we create (like skipping rastflat if we don't read
colors in the FS, or skipping vclamp_color if we don't write colors).

It does mean walking the whole key to AND it, but the key is just 28 bytes
so far so that seems pretty fine.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4562>

src/compiler/shader_enums.h
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 5606289..b33a917 100644 (file)
@@ -329,6 +329,10 @@ const char *gl_varying_slot_name(gl_varying_slot slot);
 #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
 #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
 #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+#define VARYING_BITS_COLOR (VARYING_BIT_COL0 | \
+                            VARYING_BIT_COL1 |        \
+                            VARYING_BIT_BFC0 |        \
+                            VARYING_BIT_BFC1)
 #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
 #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
 #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
index 676c90e..e8e95fc 100644 (file)
@@ -289,6 +289,62 @@ ir3_shader_destroy(struct ir3_shader *shader)
        free(shader);
 }
 
+/**
+ * Creates a bitmask of the used bits of the shader key by this particular
+ * shader.  Used by the gallium driver to skip state-dependent recompiles when
+ * possible.
+ */
+static void
+ir3_setup_used_key(struct ir3_shader *shader)
+{
+       nir_shader *nir = shader->nir;
+       struct shader_info *info = &nir->info;
+       struct ir3_shader_key *key = &shader->key_mask;
+
+       /* This key flag is just used to make for a cheaper ir3_shader_key_equal
+        * check in the common case.
+        */
+       key->has_per_samp = true;
+
+       if (info->stage == MESA_SHADER_FRAGMENT) {
+               key->fsaturate_s = ~0;
+               key->fsaturate_t = ~0;
+               key->fsaturate_r = ~0;
+               key->fastc_srgb = ~0;
+               key->fsamples = ~0;
+
+               if (info->inputs_read & VARYING_BITS_COLOR) {
+                       key->rasterflat = true;
+                       key->color_two_side = true;
+               }
+
+               if ((info->outputs_written & ~(FRAG_RESULT_DEPTH |
+                                                               FRAG_RESULT_STENCIL |
+                                                               FRAG_RESULT_SAMPLE_MASK)) != 0) {
+                       key->fclamp_color = true;
+               }
+
+               /* Only used for deciding on behavior of
+                * nir_intrinsic_load_barycentric_sample
+                */
+               key->msaa = info->fs.uses_sample_qualifier;
+       } else {
+               key->tessellation = ~0;
+               key->has_gs = true;
+
+               if (info->outputs_written & VARYING_BITS_COLOR)
+                       key->vclamp_color = true;
+
+               if (info->stage == MESA_SHADER_VERTEX) {
+                       key->vsaturate_s = ~0;
+                       key->vsaturate_t = ~0;
+                       key->vsaturate_r = ~0;
+                       key->vastc_srgb = ~0;
+                       key->vsamples = ~0;
+               }
+       }
+}
+
 struct ir3_shader *
 ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                struct ir3_stream_output_info *stream_output)
@@ -336,6 +392,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                nir_print_shader(shader->nir, stdout);
        }
 
+       ir3_setup_used_key(shader);
+
        return shader;
 }
 
index 3b9b3f2..ecb3948 100644 (file)
@@ -364,62 +364,6 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *las
        return false;
 }
 
-/* clears shader-key flags which don't apply to the given shader
- * stage
- */
-static inline void
-ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
-{
-       switch (type) {
-       case MESA_SHADER_FRAGMENT:
-               if (key->has_per_samp) {
-                       key->vsaturate_s = 0;
-                       key->vsaturate_t = 0;
-                       key->vsaturate_r = 0;
-                       key->vastc_srgb = 0;
-                       key->vsamples = 0;
-                       key->has_gs = false; /* FS doesn't care */
-                       key->tessellation = IR3_TESS_NONE;
-               }
-               break;
-       case MESA_SHADER_VERTEX:
-       case MESA_SHADER_GEOMETRY:
-               key->color_two_side = false;
-               key->rasterflat = false;
-               if (key->has_per_samp) {
-                       key->fsaturate_s = 0;
-                       key->fsaturate_t = 0;
-                       key->fsaturate_r = 0;
-                       key->fastc_srgb = 0;
-                       key->fsamples = 0;
-               }
-
-               /* VS and GS only care about whether or not we're tessellating. */
-               key->tessellation = !!key->tessellation;
-               break;
-       case MESA_SHADER_TESS_CTRL:
-       case MESA_SHADER_TESS_EVAL:
-               key->color_two_side = false;
-               key->rasterflat = false;
-               if (key->has_per_samp) {
-                       key->fsaturate_s = 0;
-                       key->fsaturate_t = 0;
-                       key->fsaturate_r = 0;
-                       key->fastc_srgb = 0;
-                       key->fsamples = 0;
-                       key->vsaturate_s = 0;
-                       key->vsaturate_t = 0;
-                       key->vsaturate_r = 0;
-                       key->vastc_srgb = 0;
-                       key->vsamples = 0;
-               }
-               break;
-       default:
-               /* TODO */
-               break;
-       }
-}
-
 /**
  * On a4xx+a5xx, Images share state with textures and SSBOs:
  *
@@ -674,6 +618,11 @@ struct ir3_shader {
 
        /* Map from driver_location to byte offset in per-primitive storage */
        unsigned output_loc[32];
+
+       /* Bitmask of bits of the shader key used by this shader.  Used to avoid
+        * recompiles for GL NOS that doesn't actually apply to the shader.
+        */
+       struct ir3_shader_key key_mask;
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -692,6 +641,18 @@ ir3_glsl_type_size(const struct glsl_type *type, bool bindless);
  * Helper/util:
  */
 
+/* clears shader-key flags which don't apply to the given shader.
+ */
+static inline void
+ir3_key_clear_unused(struct ir3_shader_key *key, struct ir3_shader *shader)
+{
+       uint32_t *key_bits = (uint32_t *)key;
+       uint32_t *key_mask = (uint32_t *)&shader->key_mask;
+       STATIC_ASSERT(sizeof(*key) % 4 == 0);
+       for (int i = 0; i < sizeof(*key) >> 2; i++)
+               key_bits[i] &= key_mask[i];
+}
+
 static inline int
 ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
 {
index 6e31fa0..2f7a016 100644 (file)
@@ -77,11 +77,11 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
        struct ir3_shader_variant *v;
        bool created = false;
 
-       /* some shader key values only apply to vertex or frag shader,
-        * so normalize the key to avoid constructing multiple identical
-        * variants:
+       /* Some shader key values may not be used by a given ir3_shader (for
+        * example, fragment shader saturates in the vertex shader), so clean out
+        * those flags to avoid recompiling.
         */
-       ir3_normalize_key(&key, shader->type);
+       ir3_key_clear_unused(&key, shader);
 
        v = ir3_shader_get_variant(shader, &key, binning_pass, &created);