panfrost: Hook-up indirect dispatch support
authorBoris Brezillon <boris.brezillon@collabora.com>
Mon, 19 Apr 2021 15:58:36 +0000 (17:58 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 22 Apr 2021 16:59:18 +0000 (16:59 +0000)
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10332>

src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_compute.c
src/gallium/drivers/panfrost/pan_job.h
src/gallium/drivers/panfrost/pan_screen.c

index 1c50dbf..610b787 100644 (file)
@@ -1007,11 +1007,12 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, unsigned rt,
 }
 
 static void
-panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
+panfrost_upload_sysvals(struct panfrost_batch *batch,
+                        const struct panfrost_ptr *ptr,
                         struct panfrost_shader_state *ss,
                         enum pipe_shader_type st)
 {
-        struct sysval_uniform *uniforms = (void *)buf;
+        struct sysval_uniform *uniforms = ptr->cpu;
 
         for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
                 int sysval = ss->info.sysvals.sysvals[i];
@@ -1036,6 +1037,10 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
                                                     &uniforms[i]);
                         break;
                 case PAN_SYSVAL_NUM_WORK_GROUPS:
+                        for (unsigned j = 0; j < 3; j++) {
+                                batch->num_wg_sysval[j] =
+                                        ptr->gpu + (i * sizeof(*uniforms)) + (j * 4);
+                        }
                         panfrost_upload_num_work_groups_sysval(batch,
                                                                &uniforms[i]);
                         break;
@@ -1115,7 +1120,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
                 panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
 
         /* Upload sysvals requested by the shader */
-        panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
+        panfrost_upload_sysvals(batch, &transfer, ss, stage);
 
         /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
         struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
@@ -1171,6 +1176,15 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
         for (unsigned i = 0; i < ss->info.push.count; ++i) {
                 struct panfrost_ubo_word src = ss->info.push.words[i];
 
+                if (src.ubo == sysval_ubo) {
+                        unsigned sysval_idx = src.offset / 16;
+                        unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
+                        if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS) {
+                                unsigned word = (src.offset % 16) / 4;
+
+                                batch->num_wg_sysval[word] = push_transfer.gpu + (4 * i);
+                        }
+                }
                 /* Map the UBO, this should be cheap. However this is reading
                  * from write-combine memory which is _very_ slow. It might pay
                  * off to upload sysvals to a staging buffer on the CPU on the
index 9624b7d..a2023ac 100644 (file)
@@ -30,6 +30,7 @@
 #include "pan_cmdstream.h"
 #include "panfrost-quirks.h"
 #include "pan_bo.h"
+#include "pan_indirect_dispatch.h"
 #include "pan_shader.h"
 #include "util/u_memory.h"
 #include "nir_serialize.h"
@@ -106,9 +107,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
          */
         panfrost_batch_reserve_tls(batch, true);
 
-        /* TODO: Indirect compute dispatch */
-        assert(!info->indirect);
-
         ctx->compute_grid = info;
 
         struct panfrost_ptr t =
@@ -131,9 +129,13 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         void *invocation =
                 pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
+        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
+
+        if (info->indirect)
+                num_wg[0] = num_wg[1] = num_wg[2] = 1;
+
         panfrost_pack_work_groups_compute(invocation,
-                                          info->grid[0], info->grid[1],
-                                          info->grid[2],
+                                          num_wg[0], num_wg[1], num_wg[2],
                                           info->block[0], info->block[1],
                                           info->block[2],
                                           false);
@@ -162,8 +164,27 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
 
+        unsigned indirect_dep = 0;
+        if (info->indirect) {
+                struct pan_indirect_dispatch_info indirect = {
+                        .job = t.gpu,
+                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
+                                        info->indirect_offset,
+                        .num_wg_sysval = {
+                                batch->num_wg_sysval[0],
+                                batch->num_wg_sysval[1],
+                                batch->num_wg_sysval[2],
+                        },
+                };
+
+                indirect_dep = pan_indirect_dispatch_emit(&batch->pool,
+                                                          &batch->scoreboard,
+                                                          &indirect);
+        }
+
         panfrost_add_job(&batch->pool, &batch->scoreboard,
-                         MALI_JOB_TYPE_COMPUTE, true, false, 0, 0, &t, true);
+                         MALI_JOB_TYPE_COMPUTE, true, false,
+                         indirect_dep, 0, &t, false);
         panfrost_flush_all_batches(ctx);
 }
 
index 0c8e96a..6906c7c 100644 (file)
@@ -127,6 +127,9 @@ struct panfrost_batch {
         /* Indirect draw data */
         struct panfrost_ptr indirect_draw_ctx;
         unsigned indirect_draw_job_id;
+
+        /* Keep the num_work_groups sysval around for indirect dispatch */
+        mali_ptr num_wg_sysval[3];
 };
 
 /* Functions for managing the above */
index ed1266b..67d25e8 100644 (file)
@@ -50,6 +50,7 @@
 #include "pan_resource.h"
 #include "pan_public.h"
 #include "pan_util.h"
+#include "pan_indirect_dispatch.h"
 #include "pan_indirect_draw.h"
 #include "decode.h"
 
@@ -696,6 +697,7 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
 {
         struct panfrost_device *dev = pan_device(pscreen);
 
+        pan_indirect_dispatch_cleanup(dev);
         panfrost_cleanup_indirect_draw_shaders(dev);
         pan_blitter_cleanup(dev);
         pan_blend_shaders_cleanup(dev);
@@ -872,6 +874,7 @@ panfrost_create_screen(int fd, struct renderonly *ro)
         panfrost_resource_screen_init(&screen->base);
         pan_blend_shaders_init(dev);
         panfrost_init_indirect_draw_shaders(dev);
+        pan_indirect_dispatch_init(dev);
         pan_blitter_init(dev);
 
         return &screen->base;