nir/load_store_vectorize: improve vectorization with identical operations
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 10 Jun 2020 10:47:55 +0000 (11:47 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 28 Sep 2020 12:06:30 +0000 (12:06 +0000)
We might have several identical options to vectorize an entry with, but
only one might be vectorizable because of writes interfering.

An example of this is a pattern found in some CTS tests:
a = load(0)
b = load(4)
store(0, a)
store(4, b)
a = load(0)
b = load(4)
store(0, a)
store(4, b)
...

It might have attempted to vectorize the first load(0) with the second
load(4) without attempting the second load(4) when the first fails. This
changes vectorize_entries() to continue even if the first try_vectorize()
failed.

fossil-db (Navi):
Totals from 117 (0.09% of 137413) affected shaders:
SGPRs: 7040 -> 7088 (+0.68%)
CodeSize: 276504 -> 276308 (-0.07%); split: -0.08%, +0.01%
Instrs: 51152 -> 51111 (-0.08%); split: -0.09%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5415>

src/compiler/nir/nir_opt_load_store_vectorize.c

index 370366a..35a6507 100644 (file)
@@ -1144,23 +1144,33 @@ vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct has
             util_dynarray_num_elements(arr, struct entry *),
             sizeof(struct entry *), &sort_entries);
 
-      unsigned i = 0;
-      for (; i < util_dynarray_num_elements(arr, struct entry*) - 1; i++) {
-         struct entry *low = *util_dynarray_element(arr, struct entry *, i);
-         struct entry *high = *util_dynarray_element(arr, struct entry *, i + 1);
+      unsigned num_entries = util_dynarray_num_elements(arr, struct entry *);
 
-         uint64_t diff = high->offset_signed - low->offset_signed;
-         if (diff > get_bit_size(low) / 8u * low->intrin->num_components)
+      for (unsigned first_idx = 0; first_idx < num_entries; first_idx++) {
+         struct entry *low = *util_dynarray_element(arr, struct entry *, first_idx);
+         if (!low)
             continue;
 
-         struct entry *first = low->index < high->index ? low : high;
-         struct entry *second = low->index < high->index ? high : low;
+         for (unsigned second_idx = first_idx + 1; second_idx < num_entries; second_idx++) {
+            struct entry *high = *util_dynarray_element(arr, struct entry *, second_idx);
+            if (!high)
+               continue;
 
-         if (try_vectorize(impl, ctx, low, high, first, second)) {
-            *util_dynarray_element(arr, struct entry *, i) = NULL;
-            *util_dynarray_element(arr, struct entry *, i + 1) = low->is_store ? second : first;
-            progress = true;
+            uint64_t diff = high->offset_signed - low->offset_signed;
+            if (diff > get_bit_size(low) / 8u * low->intrin->num_components)
+               break;
+
+            struct entry *first = low->index < high->index ? low : high;
+            struct entry *second = low->index < high->index ? high : low;
+
+            if (try_vectorize(impl, ctx, low, high, first, second)) {
+               low = low->is_store ? second : first;
+               *util_dynarray_element(arr, struct entry *, second_idx) = NULL;
+               progress = true;
+            }
          }
+
+         *util_dynarray_element(arr, struct entry *, first_idx) = low;
       }
 
       util_dynarray_foreach(arr, struct entry *, elem) {