From ee073cb543ea0e10cc5933caba8a288897c7e4ea Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 10 Jun 2020 11:47:55 +0100 Subject: [PATCH] nir/load_store_vectorize: improve vectorization with identical operations We might have several identical options to vectorize an entry with, but only one might be vectorizable because of writes interfering. An example of this is a pattern found in some CTS tests: a = load(0) b = load(4) store(0, a) store(4, b) a = load(0) b = load(4) store(0, a) store(4, b) ... It might have attempted to vectorize the first load(0) with the second load(4) without attempting the second load(4) when the first fails. This changes vectorize_entries() to continue even if the first try_vectorize() failed. fossil-db (Navi): Totals from 117 (0.09% of 137413) affected shaders: SGPRs: 7040 -> 7088 (+0.68%) CodeSize: 276504 -> 276308 (-0.07%); split: -0.08%, +0.01% Instrs: 51152 -> 51111 (-0.08%); split: -0.09%, +0.01% Signed-off-by: Rhys Perry Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir_opt_load_store_vectorize.c | 34 ++++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 370366a..35a6507 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -1144,23 +1144,33 @@ vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct has util_dynarray_num_elements(arr, struct entry *), sizeof(struct entry *), &sort_entries); - unsigned i = 0; - for (; i < util_dynarray_num_elements(arr, struct entry*) - 1; i++) { - struct entry *low = *util_dynarray_element(arr, struct entry *, i); - struct entry *high = *util_dynarray_element(arr, struct entry *, i + 1); + unsigned num_entries = util_dynarray_num_elements(arr, struct entry *); - uint64_t diff = high->offset_signed - low->offset_signed; - if (diff > get_bit_size(low) / 8u * low->intrin->num_components) + for (unsigned first_idx = 0; first_idx < num_entries; first_idx++) { + struct entry *low = *util_dynarray_element(arr, struct entry *, first_idx); + if (!low) continue; - struct entry *first = low->index < high->index ? low : high; - struct entry *second = low->index < high->index ? high : low; + for (unsigned second_idx = first_idx + 1; second_idx < num_entries; second_idx++) { + struct entry *high = *util_dynarray_element(arr, struct entry *, second_idx); + if (!high) + continue; - if (try_vectorize(impl, ctx, low, high, first, second)) { - *util_dynarray_element(arr, struct entry *, i) = NULL; - *util_dynarray_element(arr, struct entry *, i + 1) = low->is_store ? second : first; - progress = true; + uint64_t diff = high->offset_signed - low->offset_signed; + if (diff > get_bit_size(low) / 8u * low->intrin->num_components) + break; + + struct entry *first = low->index < high->index ? low : high; + struct entry *second = low->index < high->index ? high : low; + + if (try_vectorize(impl, ctx, low, high, first, second)) { + low = low->is_store ? second : first; + *util_dynarray_element(arr, struct entry *, second_idx) = NULL; + progress = true; + } } + + *util_dynarray_element(arr, struct entry *, first_idx) = low; } util_dynarray_foreach(arr, struct entry *, elem) { -- 2.7.4