From f3398683f22bd9dd07c993c605b0ab4df8894690 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 6 Aug 2023 22:15:42 -0400 Subject: [PATCH] radeonsi: don't use threadID.yz/blockID.yz for compute_blit if they're always 0 This can improve performance because fewer VGPRs and SGPRs need to be initialized. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_compute_blit.c | 9 ++++++--- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 571fbbb..f1e56d6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -1121,6 +1121,11 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, image[1].u.tex.first_layer = 0; image[1].u.tex.last_layer = util_max_layer(info->dst.resource, info->dst.level); + struct pipe_grid_info grid = {0}; + unsigned wg_dim = + set_work_size(&grid, 8, 8, 1, info->dst.box.width, info->dst.box.height, + info->dst.box.depth); + /* Get the shader key. */ const struct util_format_description *dst_desc = util_format_description(info->dst.format); unsigned i = util_format_get_first_non_void_channel(info->dst.format); @@ -1128,6 +1133,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, options.key = 0; options.always_true = true; + options.wg_dim = wg_dim; options.src_is_1d = info->src.resource->target == PIPE_TEXTURE_1D || info->src.resource->target == PIPE_TEXTURE_1D_ARRAY; options.dst_is_1d = info->dst.resource->target == PIPE_TEXTURE_1D || @@ -1177,9 +1183,6 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, sctx->cs_user_data[1] = (info->src.box.y & 0xffff) | ((info->dst.box.y & 0xffff) << 16); sctx->cs_user_data[2] = (info->src.box.z & 0xffff) | ((info->dst.box.z & 0xffff) << 16); - struct pipe_grid_info grid = {0}; - set_work_size(&grid, 8, 8, 1, info->dst.box.width, info->dst.box.height, info->dst.box.depth); - si_launch_grid_internal_images(sctx, image, 2, &grid, shader, SI_OP_SYNC_BEFORE_AFTER | (info->render_condition_enable ? SI_OP_CS_RENDER_COND_ENABLE : 0)); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1cf4798..bb39a94 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1593,6 +1593,7 @@ union si_compute_blit_shader_key { /* The key saved in _mesa_hash_table_create_u32_keys() can't be 0. */ bool always_true:1; /* Declaration modifiers. */ + uint8_t wg_dim:2; /* 1, 2, or 3 */ bool src_is_1d:1; bool dst_is_1d:1; bool src_is_msaa:1; diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 1a400cd..974ad99 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -438,6 +438,7 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha BITSET_SET(b.shader->info.msaa_images, 0); if (options->dst_is_msaa) BITSET_SET(b.shader->info.msaa_images, 1); + /* TODO: 1D blits are 8x slower because the workgroup size is 8x8 */ b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; b.shader->info.workgroup_size[2] = 1; @@ -462,7 +463,7 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha /* Instructions. */ /* Let's work with 0-based src and dst coordinates (thread IDs) first. */ - nir_def *dst_xyz = get_global_ids(&b, 3); + nir_def *dst_xyz = nir_pad_vector_imm_int(&b, get_global_ids(&b, options->wg_dim), 0, 3); nir_def *src_xyz = dst_xyz; /* Flip src coordinates. */ -- 2.7.4