From 49ec863e8303170fd2a871689f9d9366215dca7e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 20 Aug 2020 13:29:58 -0700 Subject: [PATCH] freedreno/ir3: Enable the i/o vectorizer on UBOs. This will merge loads of UBO components together into vec4 loads. At the same time, it improves the alignment information on our loads, fixing the regression from the vec3 loads fix. shader-db results: total instructions in shared programs: 12829370 -> 8755851 (-31.75%) total cat6 in shared programs: 145840 -> 97027 (-33.47%) Overall results from before the vec3 fix: total instructions in shared programs: 8019997 -> 8755851 (9.18%) total cat6 in shared programs: 87683 -> 97027 (10.66%) Part-of: --- src/freedreno/ir3/ir3_nir.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 64daa68..2628746 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -145,6 +145,36 @@ ir3_get_compiler_options(struct ir3_compiler *compiler) return &options; } +static bool +ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, + unsigned bit_size, + unsigned num_components, + nir_intrinsic_instr *low, + nir_intrinsic_instr *high) +{ + assert(bit_size >= 8); + if (bit_size != 32) + return false; + unsigned byte_size = bit_size / 8; + + int size = num_components * byte_size; + + /* Don't care about alignment past vec4. */ + assert(util_is_power_of_two_nonzero(align_mul)); + align_mul = MIN2(align_mul, 16); + align_offset &= 15; + + /* Our offset alignment should aways be at least 4 bytes */ + if (align_mul < 4) + return false; + + unsigned worst_start_offset = 16 - align_mul + align_offset; + if (worst_start_offset + size > 16) + return false; + + return true; +} + #define OPT(nir, pass, ...) ({ \ bool this_progress = false; \ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ @@ -188,6 +218,9 @@ ir3_optimize_loop(nir_shader *s) progress |= OPT(s, nir_lower_pack); progress |= OPT(s, nir_opt_constant_folding); + progress |= OPT(s, nir_opt_load_store_vectorize, nir_var_mem_ubo, + ir3_nir_should_vectorize_mem, 0); + if (lower_flrp != 0) { if (OPT(s, nir_lower_flrp, lower_flrp, -- 2.7.4