From 11993185a2ca98c3f5d3ec682f4e55ecd3062410 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 3 Nov 2022 18:39:00 -0400 Subject: [PATCH] radeonsi: don't load/resolve/store non-existent src/dst channels in blit shaders RGBX only loads and resolves 3 components, etc. v2: buf fixes to make AMD_TEST=computeblit pass Reviewed-by: Pierre-Eric Pelloux-Prayer (v1) Part-of: --- src/gallium/drivers/radeonsi/si_compute_blit.c | 22 ++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 3 +++ src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 13 +++++++++++++ 3 files changed, 38 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 48594af..aeb86af 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -1036,6 +1036,22 @@ void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surfac ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb); } +/* Return the last component that a compute blit should load and store. */ +static unsigned si_format_get_last_blit_component(enum pipe_format format, bool is_dst) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned num = 0; + + for (unsigned i = 1; i < 4; i++) { + if (desc->swizzle[i] <= PIPE_SWIZZLE_W || + /* If the swizzle is 1 for dst, we need to store 1 explicitly. + * The hardware stores 0 by default. */ + (is_dst && desc->swizzle[i] == PIPE_SWIZZLE_1)) + num = i; + } + return num; +} + bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info) { /* Compute blits require D16 right now (see the ISA). @@ -1109,6 +1125,12 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info) options.uint_to_sint = util_format_is_pure_uint(info->src.format) && util_format_is_pure_sint(info->dst.format); options.dst_is_srgb = util_format_is_srgb(info->dst.format); + options.last_dst_channel = si_format_get_last_blit_component(info->dst.format, true); + options.last_src_channel = MIN2(si_format_get_last_blit_component(info->src.format, false), + options.last_dst_channel); + options.use_integer_one = util_format_is_pure_integer(info->dst.format) && + options.last_src_channel < options.last_dst_channel && + options.last_dst_channel == 3; options.fp16_rtz = !util_format_is_pure_integer(info->dst.format) && (dst_desc->channel[i].size <= 10 || (dst_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT && diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 0dcddfa..c8fc1ba 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1579,6 +1579,9 @@ union si_compute_blit_shader_key { bool sint_to_uint:1; bool uint_to_sint:1; bool dst_is_srgb:1; + bool use_integer_one:1; + uint8_t last_src_channel:2; + uint8_t last_dst_channel:2; bool fp16_rtz:1; /* only for equality with pixel shaders, not necessary otherwise */ }; uint32_t key; diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 3192b73..1d074ae 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -392,6 +392,19 @@ static nir_ssa_def *apply_blit_output_modifiers(nir_builder *b, nir_ssa_def *col if (options->dst_is_srgb) color = convert_linear_to_srgb(b, color); + nir_ssa_def *zero = nir_imm_int(b, 0); + nir_ssa_def *one = options->use_integer_one ? nir_imm_int(b, 1) : nir_imm_float(b, 1); + + /* Set channels not present in src to 0 or 1. This will eliminate code loading and resolving + * those channels. + */ + for (unsigned chan = options->last_src_channel + 1; chan <= options->last_dst_channel; chan++) + color = nir_vector_insert_imm(b, color, chan == 3 ? one : zero, chan); + + /* Discard channels not present in dst. The hardware fills unstored channels with 0. */ + if (options->last_dst_channel < 3) + color = nir_trim_vector(b, color, options->last_dst_channel + 1); + /* Convert to FP16 with rtz to match the pixel shader. Not necessary, but it helps verify * the behavior of the whole shader by comparing it to the gfx blit. */ -- 2.7.4