From 6a78af1dbb0da2ad2606c489ce91181532845a91 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Tue, 11 Apr 2023 14:17:40 +0200 Subject: [PATCH] r600/sfn: make sure f2u32 is lowered late and correctly for 64 bit floats With the latest changes in opt_algebraic we got f2u32 in the final code that should be lowered before conversion to assembly. Fixes: b3685f3ba7fddbe73f363ff4d53ca734841e4b06 nir/algebraic: insert patterns inside optimizations list Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 3 +++ src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 773e70a..d3c12ef 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -924,6 +924,9 @@ r600_shader_from_nir(struct r600_context *rctx, while (optimize_once(sh)) ; + if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64) + NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi); + bool late_algebraic_progress; do { late_algebraic_progress = false; diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index 6a79cf3..7234c5a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -212,11 +212,11 @@ class LowerSplit64op : public NirLowerInstruction { * rounds, we have to remove the fractional part in the hi bits * For values > UINT_MAX the result is undefined */ auto src = nir_ssa_for_alu_src(b, alu, 0); - src = nir_fsub(b, src, nir_ffract(b, src)); + src = nir_fadd(b, src, nir_fneg(b, nir_ffract(b, src))); auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src); auto highval = nir_fmul_imm(b, src, 1.0 / 65536.0); auto fract = nir_ffract(b, highval); - auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract))); + auto high = nir_f2u32(b, nir_f2f32(b, nir_fadd(b, highval, nir_fneg(b, fract)))); auto lowval = nir_fmul_imm(b, fract, 65536.0); auto low = nir_f2u32(b, nir_f2f32(b, lowval)); return nir_bcsel(b, -- 2.7.4