From: Alyssa Rosenzweig Date: Tue, 6 Jul 2021 22:07:26 +0000 (-0400) Subject: panfrost: Move launch_grid to pan_cmdstream X-Git-Tag: upstream/21.2.3~758 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fa2d70aad8964619595eb118ab3aaa325d2ec7a2;p=platform%2Fupstream%2Fmesa.git panfrost: Move launch_grid to pan_cmdstream Same issues apply as for draw_vbo. Signed-off-by: Alyssa Rosenzweig Acked-by: Boris Brezillon Part-of: --- diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 965108a..2c3b849 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -43,6 +43,7 @@ #include "pan_texture.h" #include "pan_util.h" #include "pan_indirect_draw.h" +#include "pan_indirect_dispatch.h" /* Statically assert that PIPE_* enums match the hardware enums. * (As long as they match, we don't need to translate them.) @@ -3133,6 +3134,130 @@ panfrost_draw_vbo(struct pipe_context *pipe, } +/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we + * construct the COMPUTE job and some of its payload. + */ + +static void +panfrost_launch_grid(struct pipe_context *pipe, + const struct pipe_grid_info *info) +{ + struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_device *dev = pan_device(pipe->screen); + + /* XXX - shouldn't be necessary with working memory barriers. Affected + * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */ + panfrost_flush_all_batches(ctx); + + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + + struct panfrost_shader_state *cs = + &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0]; + + /* Indirect dispatch can't handle workgroup local storage since that + * would require dynamic memory allocation. Bail in this case. */ + if (info->indirect && !cs->info.wls_size) { + struct pipe_transfer *transfer; + uint32_t *params = pipe_buffer_map_range(pipe, info->indirect, + info->indirect_offset, + 3 * sizeof(uint32_t), + PIPE_MAP_READ, + &transfer); + + struct pipe_grid_info direct = *info; + direct.indirect = NULL; + direct.grid[0] = params[0]; + direct.grid[1] = params[1]; + direct.grid[2] = params[2]; + pipe_buffer_unmap(pipe, transfer); + + if (params[0] && params[1] && params[2]) + panfrost_launch_grid(pipe, &direct); + + return; + } + + ctx->compute_grid = info; + + struct panfrost_ptr t = + pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); + + /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so + * reuse the graphics path for this by lowering to Gallium */ + + struct pipe_constant_buffer ubuf = { + .buffer = NULL, + .buffer_offset = 0, + .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem, + .user_buffer = info->input + }; + + if (info->input) + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf); + + /* Invoke according to the grid info */ + + void *invocation = + pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION); + unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] }; + + if (info->indirect) + num_wg[0] = num_wg[1] = num_wg[2] = 1; + + panfrost_pack_work_groups_compute(invocation, + num_wg[0], num_wg[1], num_wg[2], + info->block[0], info->block[1], + info->block[2], + false, info->indirect != NULL); + + pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = + util_logbase2_ceil(info->block[0] + 1) + + util_logbase2_ceil(info->block[1] + 1) + + util_logbase2_ceil(info->block[2] + 1); + } + + pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) { + cfg.draw_descriptor_is_64b = true; + if (!pan_is_bifrost(dev)) + cfg.texture_descriptor_is_64b = true; + cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE); + cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE); + cfg.thread_storage = panfrost_emit_shared_memory(batch, info); + cfg.uniform_buffers = panfrost_emit_const_buf(batch, + PIPE_SHADER_COMPUTE, &cfg.push_uniforms); + cfg.textures = panfrost_emit_texture_descriptors(batch, + PIPE_SHADER_COMPUTE); + cfg.samplers = panfrost_emit_sampler_descriptors(batch, + PIPE_SHADER_COMPUTE); + } + + pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg); + + unsigned indirect_dep = 0; + if (info->indirect) { + struct pan_indirect_dispatch_info indirect = { + .job = t.gpu, + .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu + + info->indirect_offset, + .num_wg_sysval = { + batch->num_wg_sysval[0], + batch->num_wg_sysval[1], + batch->num_wg_sysval[2], + }, + }; + + indirect_dep = pan_indirect_dispatch_emit(&batch->pool.base, + &batch->scoreboard, + &indirect); + } + + panfrost_add_job(&batch->pool.base, &batch->scoreboard, + MALI_JOB_TYPE_COMPUTE, true, false, + indirect_dep, 0, &t, false); + panfrost_flush_all_batches(ctx); +} + static void * panfrost_create_rasterizer_state( struct pipe_context *pctx, @@ -3552,6 +3677,8 @@ void panfrost_cmdstream_context_init(struct pipe_context *pipe) { pipe->draw_vbo = panfrost_draw_vbo; + pipe->launch_grid = panfrost_launch_grid; + pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state; pipe->create_rasterizer_state = panfrost_create_rasterizer_state; pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 8a0d68f..ca5f2bd 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -30,7 +30,6 @@ #include "pan_cmdstream.h" #include "panfrost-quirks.h" #include "pan_bo.h" -#include "pan_indirect_dispatch.h" #include "pan_shader.h" #include "util/u_memory.h" #include "nir_serialize.h" @@ -91,130 +90,6 @@ panfrost_delete_compute_state(struct pipe_context *pipe, void *cso) free(cso); } -/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we - * construct the COMPUTE job and some of its payload. - */ - -static void -panfrost_launch_grid(struct pipe_context *pipe, - const struct pipe_grid_info *info) -{ - struct panfrost_context *ctx = pan_context(pipe); - struct panfrost_device *dev = pan_device(pipe->screen); - - /* XXX - shouldn't be necessary with working memory barriers. Affected - * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */ - panfrost_flush_all_batches(ctx); - - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - - struct panfrost_shader_state *cs = - &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0]; - - /* Indirect dispatch can't handle workgroup local storage since that - * would require dynamic memory allocation. Bail in this case. */ - if (info->indirect && !cs->info.wls_size) { - struct pipe_transfer *transfer; - uint32_t *params = pipe_buffer_map_range(pipe, info->indirect, - info->indirect_offset, - 3 * sizeof(uint32_t), - PIPE_MAP_READ, - &transfer); - - struct pipe_grid_info direct = *info; - direct.indirect = NULL; - direct.grid[0] = params[0]; - direct.grid[1] = params[1]; - direct.grid[2] = params[2]; - pipe_buffer_unmap(pipe, transfer); - - if (params[0] && params[1] && params[2]) - panfrost_launch_grid(pipe, &direct); - - return; - } - - ctx->compute_grid = info; - - struct panfrost_ptr t = - pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); - - /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so - * reuse the graphics path for this by lowering to Gallium */ - - struct pipe_constant_buffer ubuf = { - .buffer = NULL, - .buffer_offset = 0, - .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem, - .user_buffer = info->input - }; - - if (info->input) - pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf); - - /* Invoke according to the grid info */ - - void *invocation = - pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION); - unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] }; - - if (info->indirect) - num_wg[0] = num_wg[1] = num_wg[2] = 1; - - panfrost_pack_work_groups_compute(invocation, - num_wg[0], num_wg[1], num_wg[2], - info->block[0], info->block[1], - info->block[2], - false, info->indirect != NULL); - - pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = - util_logbase2_ceil(info->block[0] + 1) + - util_logbase2_ceil(info->block[1] + 1) + - util_logbase2_ceil(info->block[2] + 1); - } - - pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.draw_descriptor_is_64b = true; - if (!pan_is_bifrost(dev)) - cfg.texture_descriptor_is_64b = true; - cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE); - cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE); - cfg.thread_storage = panfrost_emit_shared_memory(batch, info); - cfg.uniform_buffers = panfrost_emit_const_buf(batch, - PIPE_SHADER_COMPUTE, &cfg.push_uniforms); - cfg.textures = panfrost_emit_texture_descriptors(batch, - PIPE_SHADER_COMPUTE); - cfg.samplers = panfrost_emit_sampler_descriptors(batch, - PIPE_SHADER_COMPUTE); - } - - pan_section_pack(t.cpu, COMPUTE_JOB, DRAW_PADDING, cfg); - - unsigned indirect_dep = 0; - if (info->indirect) { - struct pan_indirect_dispatch_info indirect = { - .job = t.gpu, - .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu + - info->indirect_offset, - .num_wg_sysval = { - batch->num_wg_sysval[0], - batch->num_wg_sysval[1], - batch->num_wg_sysval[2], - }, - }; - - indirect_dep = pan_indirect_dispatch_emit(&batch->pool.base, - &batch->scoreboard, - &indirect); - } - - panfrost_add_job(&batch->pool.base, &batch->scoreboard, - MALI_JOB_TYPE_COMPUTE, true, false, - indirect_dep, 0, &t, false); - panfrost_flush_all_batches(ctx); -} - static void panfrost_set_compute_resources(struct pipe_context *pctx, unsigned start, unsigned count, @@ -262,8 +137,6 @@ panfrost_compute_context_init(struct pipe_context *pctx) pctx->bind_compute_state = panfrost_bind_compute_state; pctx->delete_compute_state = panfrost_delete_compute_state; - pctx->launch_grid = panfrost_launch_grid; - pctx->set_compute_resources = panfrost_set_compute_resources; pctx->set_global_binding = panfrost_set_global_binding;