From: Alyssa Rosenzweig <alyssa@collabora.com>
Date: Tue, 6 Jul 2021 22:07:26 +0000 (-0400)
Subject: panfrost: Move launch_grid to pan_cmdstream
X-Git-Tag: upstream/21.2.3~758
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fa2d70aad8964619595eb118ab3aaa325d2ec7a2;p=platform%2Fupstream%2Fmesa.git

panfrost: Move launch_grid to pan_cmdstream

Same issues apply as for draw_vbo.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11745>
---

diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 965108a..2c3b849 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -43,6 +43,7 @@
 #include "pan_texture.h"
 #include "pan_util.h"
 #include "pan_indirect_draw.h"
+#include "pan_indirect_dispatch.h"
 
 /* Statically assert that PIPE_* enums match the hardware enums.
  * (As long as they match, we don't need to translate them.)
@@ -3133,6 +3134,130 @@ panfrost_draw_vbo(struct pipe_context *pipe,
 
 }
 
+/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
+ * construct the COMPUTE job and some of its payload.
+ */
+
+static void
+panfrost_launch_grid(struct pipe_context *pipe,
+                const struct pipe_grid_info *info)
+{
+        struct panfrost_context *ctx = pan_context(pipe);
+        struct panfrost_device *dev = pan_device(pipe->screen);
+
+        /* XXX - shouldn't be necessary with working memory barriers. Affected
+         * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
+        panfrost_flush_all_batches(ctx);
+
+        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
+        struct panfrost_shader_state *cs =
+                &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
+
+        /* Indirect dispatch can't handle workgroup local storage since that
+         * would require dynamic memory allocation. Bail in this case. */
+        if (info->indirect && !cs->info.wls_size) {
+                struct pipe_transfer *transfer;
+                uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
+                                info->indirect_offset,
+                                3 * sizeof(uint32_t),
+                                PIPE_MAP_READ,
+                                &transfer);
+
+                struct pipe_grid_info direct = *info;
+                direct.indirect = NULL;
+                direct.grid[0] = params[0];
+                direct.grid[1] = params[1];
+                direct.grid[2] = params[2];
+                pipe_buffer_unmap(pipe, transfer);
+
+                if (params[0] && params[1] && params[2])
+                        panfrost_launch_grid(pipe, &direct);
+
+                return;
+        }
+
+        ctx->compute_grid = info;
+
+        struct panfrost_ptr t =
+                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+
+        /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
+         * reuse the graphics path for this by lowering to Gallium */
+
+        struct pipe_constant_buffer ubuf = {
+                .buffer = NULL,
+                .buffer_offset = 0,
+                .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
+                .user_buffer = info->input
+        };
+
+        if (info->input)
+                pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf);
+
+        /* Invoke according to the grid info */
+
+        void *invocation =
+                pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
+        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
+
+        if (info->indirect)
+                num_wg[0] = num_wg[1] = num_wg[2] = 1;
+
+        panfrost_pack_work_groups_compute(invocation,
+                                          num_wg[0], num_wg[1], num_wg[2],
+                                          info->block[0], info->block[1],
+                                          info->block[2],
+                                          false, info->indirect != NULL);
+
+        pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+                cfg.job_task_split =
+                        util_logbase2_ceil(info->block[0] + 1) +
+                        util_logbase2_ceil(info->block[1] + 1) +
+                        util_logbase2_ceil(info->block[2] + 1);
+        }
+
+        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
+                cfg.draw_descriptor_is_64b = true;
+                if (!pan_is_bifrost(dev))
+                        cfg.texture_descriptor_is_64b = true;
+                cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
+                cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
+                cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
+                cfg.uniform_buffers = panfrost_emit_const_buf(batch,
+                                PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
+                cfg.textures = panfrost_emit_texture_descriptors(batch,
+                                PIPE_SHADER_COMPUTE);
+                cfg.samplers = panfrost_emit_sampler_descriptors(batch,
+                                PIPE_SHADER_COMPUTE);
+        }
+
+        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
+
+        unsigned indirect_dep = 0;
+        if (info->indirect) {
+                struct pan_indirect_dispatch_info indirect = {
+                        .job = t.gpu,
+                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
+                                        info->indirect_offset,
+                        .num_wg_sysval = {
+                                batch->num_wg_sysval[0],
+                                batch->num_wg_sysval[1],
+                                batch->num_wg_sysval[2],
+                        },
+                };
+
+                indirect_dep = pan_indirect_dispatch_emit(&batch->pool.base,
+                                                          &batch->scoreboard,
+                                                          &indirect);
+        }
+
+        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
+                         MALI_JOB_TYPE_COMPUTE, true, false,
+                         indirect_dep, 0, &t, false);
+        panfrost_flush_all_batches(ctx);
+}
+
 static void *
 panfrost_create_rasterizer_state(
         struct pipe_context *pctx,
@@ -3552,6 +3677,8 @@ void
 panfrost_cmdstream_context_init(struct pipe_context *pipe)
 {
         pipe->draw_vbo           = panfrost_draw_vbo;
+        pipe->launch_grid        = panfrost_launch_grid;
+
         pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
         pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
         pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index 8a0d68f..ca5f2bd 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -30,7 +30,6 @@
 #include "pan_cmdstream.h"
 #include "panfrost-quirks.h"
 #include "pan_bo.h"
-#include "pan_indirect_dispatch.h"
 #include "pan_shader.h"
 #include "util/u_memory.h"
 #include "nir_serialize.h"
@@ -91,130 +90,6 @@ panfrost_delete_compute_state(struct pipe_context *pipe, void *cso)
         free(cso);
 }
 
-/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
- * construct the COMPUTE job and some of its payload.
- */
-
-static void
-panfrost_launch_grid(struct pipe_context *pipe,
-                const struct pipe_grid_info *info)
-{
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(pipe->screen);
-
-        /* XXX - shouldn't be necessary with working memory barriers. Affected
-         * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
-        panfrost_flush_all_batches(ctx);
-
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-
-        struct panfrost_shader_state *cs =
-                &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
-
-        /* Indirect dispatch can't handle workgroup local storage since that
-         * would require dynamic memory allocation. Bail in this case. */
-        if (info->indirect && !cs->info.wls_size) {
-                struct pipe_transfer *transfer;
-                uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
-                                info->indirect_offset,
-                                3 * sizeof(uint32_t),
-                                PIPE_MAP_READ,
-                                &transfer);
-
-                struct pipe_grid_info direct = *info;
-                direct.indirect = NULL;
-                direct.grid[0] = params[0];
-                direct.grid[1] = params[1];
-                direct.grid[2] = params[2];
-                pipe_buffer_unmap(pipe, transfer);
-
-                if (params[0] && params[1] && params[2])
-                        panfrost_launch_grid(pipe, &direct);
-
-                return;
-        }
-
-        ctx->compute_grid = info;
-
-        struct panfrost_ptr t =
-                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-
-        /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
-         * reuse the graphics path for this by lowering to Gallium */
-
-        struct pipe_constant_buffer ubuf = {
-                .buffer = NULL,
-                .buffer_offset = 0,
-                .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
-                .user_buffer = info->input
-        };
-
-        if (info->input)
-                pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf);
-
-        /* Invoke according to the grid info */
-
-        void *invocation =
-                pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
-        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
-
-        if (info->indirect)
-                num_wg[0] = num_wg[1] = num_wg[2] = 1;
-
-        panfrost_pack_work_groups_compute(invocation,
-                                          num_wg[0], num_wg[1], num_wg[2],
-                                          info->block[0], info->block[1],
-                                          info->block[2],
-                                          false, info->indirect != NULL);
-
-        pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split =
-                        util_logbase2_ceil(info->block[0] + 1) +
-                        util_logbase2_ceil(info->block[1] + 1) +
-                        util_logbase2_ceil(info->block[2] + 1);
-        }
-
-        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
-                cfg.draw_descriptor_is_64b = true;
-                if (!pan_is_bifrost(dev))
-                        cfg.texture_descriptor_is_64b = true;
-                cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
-                cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
-                cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
-                cfg.uniform_buffers = panfrost_emit_const_buf(batch,
-                                PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
-                cfg.textures = panfrost_emit_texture_descriptors(batch,
-                                PIPE_SHADER_COMPUTE);
-                cfg.samplers = panfrost_emit_sampler_descriptors(batch,
-                                PIPE_SHADER_COMPUTE);
-        }
-
-        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
-
-        unsigned indirect_dep = 0;
-        if (info->indirect) {
-                struct pan_indirect_dispatch_info indirect = {
-                        .job = t.gpu,
-                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
-                                        info->indirect_offset,
-                        .num_wg_sysval = {
-                                batch->num_wg_sysval[0],
-                                batch->num_wg_sysval[1],
-                                batch->num_wg_sysval[2],
-                        },
-                };
-
-                indirect_dep = pan_indirect_dispatch_emit(&batch->pool.base,
-                                                          &batch->scoreboard,
-                                                          &indirect);
-        }
-
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                         MALI_JOB_TYPE_COMPUTE, true, false,
-                         indirect_dep, 0, &t, false);
-        panfrost_flush_all_batches(ctx);
-}
-
 static void
 panfrost_set_compute_resources(struct pipe_context *pctx,
                          unsigned start, unsigned count,
@@ -262,8 +137,6 @@ panfrost_compute_context_init(struct pipe_context *pctx)
         pctx->bind_compute_state = panfrost_bind_compute_state;
         pctx->delete_compute_state = panfrost_delete_compute_state;
 
-        pctx->launch_grid = panfrost_launch_grid;
-
         pctx->set_compute_resources = panfrost_set_compute_resources;
         pctx->set_global_binding = panfrost_set_global_binding;