From bd428162b6ddba9ce1e1f22f5e4a55478d6520cf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 6 Sep 2020 20:09:01 -0500 Subject: [PATCH] nir/lower_io: Fix the unknown-array-index case in get_deref_align MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The current align_mul calculation in the unknown-array-index calculation is align_mul = MIN3(parent_mul, min_pow2_divisor(parent_offset), min_pow2_divisor(stride)) which is certainly correct if parent_offset > 0. However, when parent_offset = 0, min_pow2_divisor(parent_offset) isn't well-defined and our calculation for it is 1 << -1 which isn't well-defined. That said.... it's not actually needed. The offset to the base of the array is array_base = parent_mul * k + parent_offset for some integer k. When we throw in an unknown array index i, we get elem = parent_mul * k + parent_offset + stride * i. If we set new_align = MIN2(parent_mul, min_pow2_divisor(stride)), then both parent_mul and stride are divisible by new_align and elem = (parent_mul / new_alig) * new_align * k + (stride / new_align) * new_align * i + parent_offset = new_align * ((parent_mul / new_alig) * k + (stride / new_align) * i) + parent_offset so elem = new_align * j + parent_offset where j = (parent_mul / new_alig) * k + (stride / new_align) * i. That's a very long-winded way of saying that we can delete one parameter from the align_mul calculation and it's still fine. :-) Fixes: 480329cf8b31 "nir: Add a helper for getting the alignment of a deref" Reviewed-by: Boris Brezillon Reviewed-by: Alejandro Piñeiro Tested-by: Alejandro Piñeiro Part-of: --- src/compiler/nir/nir_lower_io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 5cda11e..d7a095c 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1425,10 +1425,8 @@ nir_get_explicit_deref_align(nir_deref_instr *deref, /* If this is a wildcard or an indirect deref, we have to go with the * power-of-two gcd. */ - *align_mul = MIN3(parent_mul, - 1 << (ffs(parent_offset) - 1), - 1 << (ffs(stride) - 1)); - *align_offset = 0; + *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1)); + *align_offset = parent_offset % *align_mul; } return true; } -- 2.7.4