From 4e09d37f3bd4b2f5837040cb1695d151672944e1 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 14 Feb 2023 10:25:54 -0600 Subject: [PATCH] nir/from_ssa: Move the loop bounds check in resolve_parallel_copy MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We loop, effectively, over two stacks: ready and to_do and finish only when both are empty. In the case where ready is empty, we pull one off of to_do, add a copy to a temporary, and push it onto the ready stack. Previously, we assumed that we would never get to the temporary copy case if to_do has exactly one entry because that would imply that there was only one copy left which means there can't possibly be a cycle to break. This was true until c7fc44f9ebbe ("nir/from_ssa: Respect and populate divergence information") which changed things such that temporary copies sometimes get added in the case where a convergent value is copied both to convergent and divergent destinations. This patch adjusts our loop iteration to always attempt to clear the ready stack before checking if there's anything left on the to_do stack. I also added an assert to make the exit condition more clear. Fixes: c7fc44f9ebbe ("nir/from_ssa: Respect and populate divergence information") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8037 Reviewed-by: Kenneth Graunke Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_from_ssa.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c index 3893609..c874309 100644 --- a/src/compiler/nir/nir_from_ssa.c +++ b/src/compiler/nir/nir_from_ssa.c @@ -767,7 +767,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, ready[++ready_idx] = i; } - while (to_do_idx >= 0) { + while (1) { while (ready_idx >= 0) { int b = ready[ready_idx--]; int a = pred[b]; @@ -793,6 +793,11 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, } } } + + assert(ready_idx < 0); + if (to_do_idx < 0) + break; + int b = to_do[to_do_idx--]; if (pred[b] == -1) continue; @@ -805,6 +810,16 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, * allocation, so we would rather not create extra register * dependencies for the backend to deal with. If it wants, the * backend can coalesce the (possibly multiple) temporaries. + * + * We can also get here in the case where there is no cycle but our + * source value is convergent, is also used as a destination by another + * element of the parallel copy, and all the destinations of the + * parallel copy which copy from it are divergent. In this case, the + * above loop cannot detect that the value has moved due to all the + * divergent destinations and we'll end up emitting a copy to a + * temporary which never gets used. We can avoid this with additional + * tracking or we can just trust the back-end to dead-code the unused + * temporary (which is trivial). */ assert(num_vals < num_copies * 2); nir_register *reg = nir_local_reg_create(state->builder.impl); -- 2.7.4