}
static void
-panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
+panfrost_upload_sysvals(struct panfrost_batch *batch,
+ const struct panfrost_ptr *ptr,
struct panfrost_shader_state *ss,
enum pipe_shader_type st)
{
- struct sysval_uniform *uniforms = (void *)buf;
+ struct sysval_uniform *uniforms = ptr->cpu;
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
int sysval = ss->info.sysvals.sysvals[i];
&uniforms[i]);
break;
case PAN_SYSVAL_NUM_WORK_GROUPS:
+ for (unsigned j = 0; j < 3; j++) {
+ batch->num_wg_sysval[j] =
+ ptr->gpu + (i * sizeof(*uniforms)) + (j * 4);
+ }
panfrost_upload_num_work_groups_sysval(batch,
&uniforms[i]);
break;
panfrost_pool_alloc_aligned(&batch->pool, sys_size, 16);
/* Upload sysvals requested by the shader */
- panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
+ panfrost_upload_sysvals(batch, &transfer, ss, stage);
/* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
for (unsigned i = 0; i < ss->info.push.count; ++i) {
struct panfrost_ubo_word src = ss->info.push.words[i];
+ if (src.ubo == sysval_ubo) {
+ unsigned sysval_idx = src.offset / 16;
+ unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
+ if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS) {
+ unsigned word = (src.offset % 16) / 4;
+
+ batch->num_wg_sysval[word] = push_transfer.gpu + (4 * i);
+ }
+ }
/* Map the UBO, this should be cheap. However this is reading
* from write-combine memory which is _very_ slow. It might pay
* off to upload sysvals to a staging buffer on the CPU on the
#include "pan_cmdstream.h"
#include "panfrost-quirks.h"
#include "pan_bo.h"
+#include "pan_indirect_dispatch.h"
#include "pan_shader.h"
#include "util/u_memory.h"
#include "nir_serialize.h"
*/
panfrost_batch_reserve_tls(batch, true);
- /* TODO: Indirect compute dispatch */
- assert(!info->indirect);
-
ctx->compute_grid = info;
struct panfrost_ptr t =
void *invocation =
pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
+ unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
+
+ if (info->indirect)
+ num_wg[0] = num_wg[1] = num_wg[2] = 1;
+
panfrost_pack_work_groups_compute(invocation,
- info->grid[0], info->grid[1],
- info->grid[2],
+ num_wg[0], num_wg[1], num_wg[2],
info->block[0], info->block[1],
info->block[2],
false);
pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
+ unsigned indirect_dep = 0;
+ if (info->indirect) {
+ struct pan_indirect_dispatch_info indirect = {
+ .job = t.gpu,
+ .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
+ info->indirect_offset,
+ .num_wg_sysval = {
+ batch->num_wg_sysval[0],
+ batch->num_wg_sysval[1],
+ batch->num_wg_sysval[2],
+ },
+ };
+
+ indirect_dep = pan_indirect_dispatch_emit(&batch->pool,
+ &batch->scoreboard,
+ &indirect);
+ }
+
panfrost_add_job(&batch->pool, &batch->scoreboard,
- MALI_JOB_TYPE_COMPUTE, true, false, 0, 0, &t, true);
+ MALI_JOB_TYPE_COMPUTE, true, false,
+ indirect_dep, 0, &t, false);
panfrost_flush_all_batches(ctx);
}
/* Indirect draw data */
struct panfrost_ptr indirect_draw_ctx;
unsigned indirect_draw_job_id;
+
+ /* Keep the num_work_groups sysval around for indirect dispatch */
+ mali_ptr num_wg_sysval[3];
};
/* Functions for managing the above */
#include "pan_resource.h"
#include "pan_public.h"
#include "pan_util.h"
+#include "pan_indirect_dispatch.h"
#include "pan_indirect_draw.h"
#include "decode.h"
{
struct panfrost_device *dev = pan_device(pscreen);
+ pan_indirect_dispatch_cleanup(dev);
panfrost_cleanup_indirect_draw_shaders(dev);
pan_blitter_cleanup(dev);
pan_blend_shaders_cleanup(dev);
panfrost_resource_screen_init(&screen->base);
pan_blend_shaders_init(dev);
panfrost_init_indirect_draw_shaders(dev);
+ pan_indirect_dispatch_init(dev);
pan_blitter_init(dev);
return &screen->base;