From: Dave Airlie Date: Fri, 21 Apr 2017 02:38:05 +0000 (+0100) Subject: radv/ac: eliminate unused vertex shader outputs. (v2) X-Git-Tag: upstream/18.1.0~10449 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f205e19e4f8e60ef74aab804a73ba6c2f3904a6c;p=platform%2Fupstream%2Fmesa.git radv/ac: eliminate unused vertex shader outputs. (v2) This is ported from radeonsi, and I can see at least one Talos shader drops an export due to this, and saves some VGPR usage. v2: use shared code. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 97cd981..d9962c7 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -31,6 +31,8 @@ #include "util/bitscan.h" #include #include "ac_shader_info.h" +#include "ac_exp_param.h" + enum radeon_llvm_calling_convention { RADEON_LLVM_AMDGPU_VS = 87, RADEON_LLVM_AMDGPU_GS = 88, @@ -5133,7 +5135,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; int i; - memset(outinfo->vs_output_param_offset, EXP_PARAM_UNDEFINED, + memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset)); if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { @@ -5758,6 +5760,37 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) } static void +ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx) +{ + struct ac_vs_output_info *outinfo; + + if (ctx->stage == MESA_SHADER_FRAGMENT || + ctx->stage == MESA_SHADER_COMPUTE || + ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_GEOMETRY) + return; + + if (ctx->stage == MESA_SHADER_VERTEX) { + if (ctx->options->key.vs.as_ls || + ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->vs.outinfo; + } + + if (ctx->stage == MESA_SHADER_TESS_EVAL) { + if (ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->tes.outinfo; + } + + ac_eliminate_const_vs_outputs(&ctx->ac, + ctx->main_function, + outinfo->vs_output_param_offset, + VARYING_SLOT_MAX, + &outinfo->param_exports); +} + +static void ac_setup_rings(struct nir_to_llvm_context *ctx) { if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || @@ -5894,6 +5927,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx); + + ac_nir_eliminate_const_vs_outputs(&ctx); free(ctx.locals); ralloc_free(ctx.defs); ralloc_free(ctx.phis); diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index f77a9b8..9ea3156 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -120,27 +120,15 @@ struct ac_userdata_locations { struct ac_userdata_info shader_data[AC_UD_MAX_UD]; }; -enum { - /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ - EXP_PARAM_OFFSET_0 = 0, - EXP_PARAM_OFFSET_31 = 31, - /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ - EXP_PARAM_DEFAULT_VAL_0000 = 64, - EXP_PARAM_DEFAULT_VAL_0001, - EXP_PARAM_DEFAULT_VAL_1110, - EXP_PARAM_DEFAULT_VAL_1111, - EXP_PARAM_UNDEFINED = 255, -}; - struct ac_vs_output_info { uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; uint8_t clip_dist_mask; uint8_t cull_dist_mask; + uint8_t param_exports; bool writes_pointsize; bool writes_layer; bool writes_viewport_index; uint32_t export_mask; - unsigned param_exports; unsigned pos_exports; }; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index d698913..ce18178 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -41,6 +41,7 @@ #include "ac_nir_to_llvm.h" #include "vk_format.h" #include "util/debug.h" +#include "ac_exp_param.h" void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant); @@ -1874,13 +1875,13 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline) static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) { uint32_t ps_input_cntl; - if (offset <= EXP_PARAM_OFFSET_31) + if (offset <= AC_EXP_PARAM_OFFSET_31) ps_input_cntl = S_028644_OFFSET(offset); else { /* The input is a DEFAULT_VAL constant. */ - assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && - offset <= EXP_PARAM_DEFAULT_VAL_1111); - offset -= EXP_PARAM_DEFAULT_VAL_0000; + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); } @@ -1903,7 +1904,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline) if (ps->info.fs.prim_id_input) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; - if (vs_offset != EXP_PARAM_UNDEFINED) { + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); ++ps_offset; } @@ -1911,7 +1912,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline) if (ps->info.fs.layer_input) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; - if (vs_offset != EXP_PARAM_UNDEFINED) { + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); ++ps_offset; } @@ -1931,7 +1932,7 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline) continue; vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i]; - if (vs_offset == EXP_PARAM_UNDEFINED) { + if (vs_offset == AC_EXP_PARAM_UNDEFINED) { pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20); ++ps_offset; continue;