From: Marek Olšák Date: Mon, 13 Dec 2021 19:36:37 +0000 (-0500) Subject: radeonsi: move smoothing to the main shader part to remove 1 live VGPR X-Git-Tag: upstream/22.3.5~14316 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=198ad7e4dc7e766ccfbd686bd23962d291c95c95;p=platform%2Fupstream%2Fmesa.git radeonsi: move smoothing to the main shader part to remove 1 live VGPR The samplemask VGPR that we had to pass to the epilog increased VGPR usage by 1 for all shaders. Do it in the main function by using the mono key structure, which causes on-demand compilation and stall, but we'll save the VGPR. 57794 shaders in 35145 tests Totals: SGPRS: 2715856 -> 2716272 (0.02 %) VGPRS: 1776168 -> 1718432 (-3.25 %) Spilled SGPRs: 3704 -> 3630 (-2.00 %) Spilled VGPRs: 1727 -> 1733 (0.35 %) Private memory VGPRs: 256 -> 256 (0.00 %) Scratch size: 2008 -> 2016 (0.40 %) dwords per thread Code Size: 61429584 -> 61393288 (-0.06 %) bytes Max Waves: 838645 -> 840484 (0.22 %) Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c019f71..e965898 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -701,8 +701,6 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) shader->selector->info.writes_z + shader->selector->info.writes_stencil + shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */; - num_returns = MAX2(num_returns, num_return_sgprs + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); - for (i = 0; i < num_return_sgprs; i++) ac_add_return(&ctx->args, AC_ARG_SGPR); for (; i < num_returns; i++) @@ -1249,9 +1247,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf); fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func); fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one); - fprintf(f, " epilog.poly_line_smoothing = %u\n", - key->ps.part.epilog.poly_line_smoothing); fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color); + fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing); fprintf(f, " mono.interpolate_at_sample_force_center = %u\n", key->ps.mono.interpolate_at_sample_force_center); fprintf(f, " mono.fbfetch_msaa = %u\n", key->ps.mono.fbfetch_msaa); @@ -1986,12 +1983,6 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_ assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)); } - /* The sample mask input is always enabled, because the API shader always - * passes it through to the epilog. Disable it here if it's unused. - */ - if (!shader->key.ps.part.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask) - shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA; - return true; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 984a53d..881653f8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -578,7 +578,6 @@ struct si_ps_epilog_bits { unsigned last_cbuf : 3; unsigned alpha_func : 3; unsigned alpha_to_one : 1; - unsigned poly_line_smoothing : 1; unsigned clamp_color : 1; }; @@ -708,6 +707,7 @@ struct si_shader_key_ps { /* Flags for monolithic compilation only. */ struct { + unsigned poly_line_smoothing : 1; unsigned interpolate_at_sample_force_center : 1; unsigned fbfetch_msaa : 1; unsigned fbfetch_is_1D : 1; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index e62ff52..fc923df 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -30,11 +30,6 @@ struct pipe_debug_callback; -/* Ideally pass the sample mask input to the PS epilog as v14, which - * is its usual location, so that the shader doesn't have to add v_mov. - */ -#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14 - struct si_shader_output_values { LLVMValueRef values[4]; ubyte vertex_stream[4]; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 8a14ab9..cbcb293 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -221,23 +221,17 @@ static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha) } } -static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx, LLVMValueRef alpha, - unsigned samplemask_param) +static LLVMValueRef si_get_coverage_from_sample_mask(struct si_shader_context *ctx) { LLVMValueRef coverage; /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ - coverage = LLVMGetParam(ctx->main_fn, samplemask_param); + coverage = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE); coverage = ac_build_bit_count(&ctx->ac, ac_to_integer(&ctx->ac, coverage)); coverage = LLVMBuildUIToFP(ctx->ac.builder, coverage, ctx->ac.f32, ""); - coverage = LLVMBuildFMul(ctx->ac.builder, coverage, - LLVMConstReal(ctx->ac.f32, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), ""); - - if (LLVMTypeOf(alpha) == ctx->ac.f16) - coverage = LLVMBuildFPTrunc(ctx->ac.builder, coverage, ctx->ac.f16, ""); - - return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, ""); + return LLVMBuildFMul(ctx->ac.builder, coverage, + LLVMConstReal(ctx->ac.f32, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), ""); } struct si_ps_exports { @@ -407,8 +401,8 @@ static bool si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue } static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index, - unsigned first_color_export, unsigned samplemask_param, - unsigned color_type, struct si_ps_exports *exp) + unsigned first_color_export, unsigned color_type, + struct si_ps_exports *exp) { int i; @@ -425,10 +419,6 @@ static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col if (index == 0 && ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(ctx, color[3]); - /* Line & polygon smoothing */ - if (ctx->shader->key.ps.part.epilog.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param); - /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) { assert(exp->num == first_color_export); @@ -470,7 +460,7 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) struct si_shader *shader = ctx->shader; struct si_shader_info *info = &shader->selector->info; LLVMBuilderRef builder = ctx->ac.builder; - unsigned i, j, first_vgpr, vgpr; + unsigned i, j, vgpr; LLVMValueRef *addrs = abi->outputs; LLVMValueRef color[8][4] = {}; @@ -507,6 +497,10 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) } } + LLVMValueRef smoothing_coverage = NULL; + if (ctx->shader->key.ps.mono.poly_line_smoothing) + smoothing_coverage = si_get_coverage_from_sample_mask(ctx); + /* Fill the return structure. */ ret = ctx->return_value; @@ -516,12 +510,17 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) SI_SGPR_ALPHA_REF, ""); /* Set VGPRs */ - first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1; + vgpr = SI_SGPR_ALPHA_REF + 1; for (i = 0; i < ARRAY_SIZE(color); i++) { if (!color[i][0]) continue; if (LLVMTypeOf(color[i][0]) == ctx->ac.f16) { + if (smoothing_coverage) { + color[i][3] = LLVMBuildFMul(builder, color[i][3], + LLVMBuildFPTrunc(builder, smoothing_coverage, ctx->ac.f16, ""), ""); + } + for (j = 0; j < 2; j++) { LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2); tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, ""); @@ -529,6 +528,9 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) } vgpr += 2; } else { + if (smoothing_coverage) + color[i][3] = LLVMBuildFMul(builder, color[i][3], smoothing_coverage, ""); + for (j = 0; j < 4; j++) ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, ""); } @@ -540,12 +542,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) if (samplemask) ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, ""); - /* Add the input sample mask for smoothing at the end. */ - if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC) - vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC; - ret = LLVMBuildInsertValue(builder, ret, LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE), - vgpr++, ""); - ctx->return_value = ret; } @@ -860,9 +856,6 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part ctx->args.num_sgprs_used + util_bitcount(key->ps_epilog.colors_written) * 4 + key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask; - required_num_params = - MAX2(required_num_params, ctx->args.num_sgprs_used + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); - while (ctx->args.arg_count < required_num_params) ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); @@ -911,8 +904,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part color[i] = LLVMGetParam(ctx->main_fn, vgpr++); } - si_export_mrt_color(ctx, color, output_index, first_color_export, - ctx->args.arg_count - 1, color_type, &exp); + si_export_mrt_color(ctx, color, output_index, first_color_export, color_type, &exp); } if (exp.num) { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 3fcb0ab..36d8a10 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -275,8 +275,8 @@ static bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); if (sctx->smoothing_enabled != - sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing; + sctx->shader.ps.current->key.ps.mono.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.mono.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); /* NGG cull state uses smoothing_enabled. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 8890ed1..b473f30 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -2196,7 +2196,7 @@ static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_co bool is_line = util_prim_is_lines(sctx->current_rast_prim); key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; - key->ps.part.epilog.poly_line_smoothing = + key->ps.mono.poly_line_smoothing = ((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) && sctx->framebuffer.nr_samples <= 1; }