From d0ace287902b1fac4a8ddcb91fac65c99218f01b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Apr 2022 15:06:11 -0500 Subject: [PATCH] nir/lower_int64: Fix [iu]mul_high handling e551040c602d, which added a new mechanism for 64-bit imul which is more efficient on BDW and later Intel hardware also introduced a bug where we weren't properly walking both X and Y. No idea how testing didn't find this. Fixes: e551040c602d ("nir/glsl: Add another way of doing lower_imul64 for gen8+" Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6306 Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir_lower_int64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c index 7900431..d5b1612 100644 --- a/src/compiler/nir/nir_lower_int64.c +++ b/src/compiler/nir/nir_lower_int64.c @@ -455,7 +455,7 @@ lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, for (unsigned i = 0; i < 4; i++) { nir_ssa_def *carry = NULL; for (unsigned j = 0; j < 4; j++) { - /* The maximum values of x32[i] and y32[i] are UINT32_MAX so the + /* The maximum values of x32[i] and y32[j] are UINT32_MAX so the * maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum * value that will fit in tmp is * @@ -466,7 +466,7 @@ lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, * so we're guaranteed that we can add in two more 32-bit values * without overflowing tmp. */ - nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[i]); + nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[j]); if (res[i + j]) tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j])); -- 2.7.4