From a9bce05700380fd5758163ebd46bc1679005743f Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 7 Mar 2022 13:10:47 +0100 Subject: [PATCH] radv: Run copy_prop and dce after folding 16bit sampling/load/store. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Totals from 10 (0.01% of 134913) affected shaders: CodeSize: 53168 -> 54832 (+3.13%); split: -0.17%, +3.30% Instrs: 9117 -> 9200 (+0.91%); split: -1.74%, +2.65% Latency: 41595 -> 41787 (+0.46%); split: -0.95%, +1.41% InvThroughput: 16412 -> 16424 (+0.07%); split: -1.95%, +2.02% VClause: 107 -> 112 (+4.67%); split: -0.93%, +5.61% Copies: 199 -> 535 (+168.84%); split: -3.02%, +171.86% PreVGPRs: 520 -> 502 (-3.46%) Signed-off-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/vulkan/radv_pipeline.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 728cd8d..35f9917 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -4507,14 +4507,21 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout } if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) && device->physical_device->rad_info.chip_class >= GFX9) { + bool copy_prop = false; uint32_t sampler_dims = UINT32_MAX; /* Skip because AMD doesn't support 16-bit types with these. */ sampler_dims &= ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE); // TODO: also optimize the tex srcs. see radeonSI for reference */ /* Skip if there are potentially conflicting rounding modes */ if (!nir_has_any_rounding_mode_enabled(stages[i].nir->info.float_controls_execution_mode)) - NIR_PASS_V(stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims); - NIR_PASS_V(stages[i].nir, nir_fold_16bit_image_load_store_conversions); + NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims); + NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_image_load_store_conversions); + + if (copy_prop) { + NIR_PASS_V(stages[i].nir, nir_copy_prop); + NIR_PASS_V(stages[i].nir, nir_opt_dce); + } + NIR_PASS_V(stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, NULL); } -- 2.7.4