From 6a78af1dbb0da2ad2606c489ce91181532845a91 Mon Sep 17 00:00:00 2001
From: Gert Wollny <gert.wollny@collabora.com>
Date: Tue, 11 Apr 2023 14:17:40 +0200
Subject: [PATCH] r600/sfn: make sure f2u32 is lowered late and correctly for
 64 bit floats

With the latest changes in opt_algebraic we got f2u32 in the final code
that should be lowered before conversion to assembly.

Fixes: b3685f3ba7fddbe73f363ff4d53ca734841e4b06
    nir/algebraic: insert patterns inside optimizations list

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22640>
---
 src/gallium/drivers/r600/sfn/sfn_nir.cpp             | 3 +++
 src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
index 773e70a..d3c12ef 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -924,6 +924,9 @@ r600_shader_from_nir(struct r600_context *rctx,
    while (optimize_once(sh))
       ;
 
+   if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64)
+      NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
+
    bool late_algebraic_progress;
    do {
       late_algebraic_progress = false;
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
index 6a79cf3..7234c5a 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@@ -212,11 +212,11 @@ class LowerSplit64op : public NirLowerInstruction {
              * rounds, we have to remove the fractional part in the hi bits
              * For values > UINT_MAX the result is undefined */
             auto src = nir_ssa_for_alu_src(b, alu, 0);
-            src = nir_fsub(b, src, nir_ffract(b, src));
+            src = nir_fadd(b, src, nir_fneg(b, nir_ffract(b, src)));
             auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
             auto highval = nir_fmul_imm(b, src, 1.0 / 65536.0);
             auto fract = nir_ffract(b, highval);
-            auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
+            auto high = nir_f2u32(b, nir_f2f32(b, nir_fadd(b, highval, nir_fneg(b, fract))));
             auto lowval = nir_fmul_imm(b, fract, 65536.0);
             auto low = nir_f2u32(b, nir_f2f32(b, lowval));
             return nir_bcsel(b,
-- 
2.7.4