panfrost: Move the blend shader cache at the device level
authorBoris Brezillon <boris.brezillon@collabora.com>
Tue, 23 Mar 2021 11:17:03 +0000 (12:17 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 29 Mar 2021 06:53:50 +0000 (06:53 +0000)
So we can re-use it in the Vulkan driver.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9831>

src/panfrost/lib/pan_blend.c
src/panfrost/lib/pan_blend.h
src/panfrost/lib/pan_device.h

index 3f955b0..8e51fd1 100644 (file)
@@ -563,3 +563,164 @@ pan_blend_create_shader(const struct panfrost_device *dev,
 
         return b.shader;
 }
+
+uint64_t
+pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
+                           enum pipe_format fmt, unsigned rt,
+                           unsigned force_size)
+{
+        const struct util_format_description *desc = util_format_description(fmt);
+        uint64_t res;
+
+        pan_pack(&res, BIFROST_INTERNAL_BLEND, cfg) {
+                cfg.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
+                cfg.fixed_function.num_comps = desc->nr_channels;
+                cfg.fixed_function.rt = rt;
+
+                nir_alu_type T = pan_unpacked_type_for_format(desc);
+
+                if (force_size)
+                        T = nir_alu_type_get_base_type(T) | force_size;
+
+                switch (T) {
+                case nir_type_float16:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
+                        break;
+                case nir_type_float32:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
+                        break;
+                case nir_type_int8:
+                case nir_type_int16:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
+                        break;
+                case nir_type_int32:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
+                        break;
+                case nir_type_uint8:
+                case nir_type_uint16:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
+                        break;
+                case nir_type_uint32:
+                        cfg.fixed_function.conversion.register_format =
+                                MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
+                        break;
+                default:
+                        unreachable("Invalid format");
+                }
+
+                cfg.fixed_function.conversion.memory_format =
+                         panfrost_format_to_bifrost_blend(dev, desc, true);
+        }
+
+        return res;
+}
+
+struct pan_blend_shader_variant *
+pan_blend_get_shader_locked(const struct panfrost_device *dev,
+                            const struct pan_blend_state *state,
+                            unsigned rt)
+{
+        struct pan_blend_shader_key key = {
+                .format = state->rts[rt].format,
+                .rt = rt,
+                .has_constants = pan_blend_constant_mask(state, rt) != 0,
+                .logicop_enable = state->logicop_enable,
+                .logicop_func = state->logicop_func,
+                .nr_samples = state->rts[rt].nr_samples,
+                .equation = state->rts[rt].equation,
+        };
+
+        struct hash_entry *he = _mesa_hash_table_search(dev->blend_shaders.shaders, &key);
+        struct pan_blend_shader *shader = he ? he->data : NULL;
+
+        if (!shader) {
+                shader = rzalloc(dev->blend_shaders.shaders, struct pan_blend_shader);
+                shader->key = key;
+                list_inithead(&shader->variants);
+                _mesa_hash_table_insert(dev->blend_shaders.shaders, &shader->key, shader);
+        }
+
+        list_for_each_entry(struct pan_blend_shader_variant, iter,
+                            &shader->variants, node) {
+                if (!key.has_constants ||
+                    !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
+                        return iter;
+                }
+        }
+
+        struct pan_blend_shader_variant *variant = NULL;
+
+        if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
+                variant = rzalloc(shader, struct pan_blend_shader_variant);
+                memcpy(variant->constants, state->constants, sizeof(variant->constants));
+                util_dynarray_init(&variant->binary, variant);
+                list_add(&variant->node, &shader->variants);
+                shader->nvariants++;
+        } else {
+                variant = list_last_entry(&shader->variants, struct pan_blend_shader_variant, node);
+                list_del(&variant->node);
+                list_add(&variant->node, &shader->variants);
+                util_dynarray_clear(&variant->binary);
+        }
+
+        nir_shader *nir = pan_blend_create_shader(dev, state, rt);
+
+        /* Compile the NIR shader */
+        struct panfrost_compile_inputs inputs = {
+                .gpu_id = dev->gpu_id,
+                .is_blend = true,
+                .blend.rt = shader->key.rt,
+                .blend.nr_samples = key.nr_samples,
+                .rt_formats = { key.format },
+        };
+
+        if (key.has_constants)
+                memcpy(inputs.blend.constants, state->constants, sizeof(inputs.blend.constants));
+
+        if (pan_is_bifrost(dev)) {
+                inputs.blend.bifrost_blend_desc =
+                        pan_blend_get_bifrost_desc(dev, key.format, key.rt, 0);
+        }
+
+        struct pan_shader_info info;
+
+        pan_shader_compile(dev, nir, &inputs, &variant->binary, &info);
+
+        variant->work_reg_count = info.work_reg_count;
+        if (!pan_is_bifrost(dev))
+                variant->first_tag = info.midgard.first_tag;
+
+        ralloc_free(nir);
+
+        return variant;
+}
+
+static uint32_t pan_blend_shader_key_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key));
+}
+
+static bool pan_blend_shader_key_equal(const void *a, const void *b)
+{
+        return !memcmp(a, b, sizeof(struct pan_blend_shader_key));
+}
+
+void
+pan_blend_shaders_init(struct panfrost_device *dev)
+{
+        dev->blend_shaders.shaders =
+                _mesa_hash_table_create(NULL, pan_blend_shader_key_hash,
+                                        pan_blend_shader_key_equal);
+        pthread_mutex_init(&dev->blend_shaders.lock, NULL);
+}
+
+void
+pan_blend_shaders_cleanup(struct panfrost_device *dev)
+{
+        _mesa_hash_table_destroy(dev->blend_shaders.shaders, NULL);
+}
index d460c1e..6cd4dc6 100644 (file)
@@ -69,6 +69,32 @@ struct pan_blend_state {
         struct pan_blend_rt_state rts[8];
 };
 
+struct pan_blend_shader_key {
+        enum pipe_format format;
+        unsigned rt : 3;
+        unsigned has_constants : 1;
+        unsigned logicop_enable : 1;
+        unsigned logicop_func:4;
+        unsigned nr_samples : 5;
+        struct pan_blend_equation equation;
+};
+
+struct pan_blend_shader_variant {
+        struct list_head node;
+        float constants[4];
+        struct util_dynarray binary;
+        unsigned first_tag;
+        unsigned work_reg_count;
+};
+
+#define PAN_BLEND_SHADER_MAX_VARIANTS 16
+
+struct pan_blend_shader {
+        struct pan_blend_shader_key key;
+        unsigned nvariants;
+        struct list_head variants;
+};
+
 bool
 pan_blend_reads_dest(const struct pan_blend_state *state, unsigned rt);
 
@@ -101,4 +127,23 @@ pan_blend_create_shader(const struct panfrost_device *dev,
                         const struct pan_blend_state *state,
                         unsigned rt);
 
+uint64_t
+pan_blend_get_bifrost_desc(const struct panfrost_device *dev,
+                           enum pipe_format fmt, unsigned rt,
+                           unsigned force_size);
+
+/* Take blend_shaders.lock before calling this function and release it when
+ * you're done with the shader variant object.
+ */
+struct pan_blend_shader_variant *
+pan_blend_get_shader_locked(const struct panfrost_device *dev,
+                            const struct pan_blend_state *state,
+                            unsigned rt);
+
+void
+pan_blend_shaders_init(struct panfrost_device *dev);
+
+void
+pan_blend_shaders_cleanup(struct panfrost_device *dev);
+
 #endif
index 7bfa563..ef8a5d4 100644 (file)
@@ -89,6 +89,11 @@ struct pan_blit_shaders {
         struct pan_blit_shader loads[PAN_BLIT_NUM_TARGETS][PAN_BLIT_NUM_TYPES][2];
 };
 
+struct pan_blend_shaders {
+        struct hash_table *shaders;
+        pthread_mutex_t lock;
+};
+
 typedef uint32_t mali_pixel_format;
 
 struct panfrost_format {
@@ -143,6 +148,7 @@ struct panfrost_device {
         } bo_cache;
 
         struct pan_blit_shaders blit_shaders;
+        struct pan_blend_shaders blend_shaders;
 
         /* Tiler heap shared across all tiler jobs, allocated against the
          * device since there's only a single tiler. Since this is invisible to