From dcad4a2cd122f58336266765d20d8b44e3ee8812 Mon Sep 17 00:00:00 2001
From: =?utf8?q?V=C3=A4in=C3=B6=20M=C3=A4kel=C3=A4?=
 <vaino.o.makela@gmail.com>
Date: Thu, 8 Dec 2022 17:41:32 +0200
Subject: [PATCH] intel/vec4: Set the rounding mode

The rounding mode only needs to be set once, because 16-bit floats or
preserving denorms aren't supported for the platforms where vec4 is
used.

Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20232>
---
 src/intel/compiler/brw_vec4.h             |  2 ++
 src/intel/compiler/brw_vec4_generator.cpp | 12 ++++++++++++
 src/intel/compiler/brw_vec4_nir.cpp       |  5 +++++
 src/intel/compiler/brw_vec4_visitor.cpp   | 20 ++++++++++++++++++++
 4 files changed, 39 insertions(+)

diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
index 958ed02..12e58fd 100644
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -275,6 +275,8 @@ public:
 
    void resolve_ud_negate(src_reg *reg);
 
+   void emit_shader_float_controls_execution_mode();
+
    bool lower_minmax();
 
    src_reg get_timestamp();
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 286ba83..c6bee01 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -2179,6 +2179,18 @@ generate_code(struct brw_codegen *p,
          brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
          break;
 
+      case SHADER_OPCODE_RND_MODE: {
+         assert(src[0].file == BRW_IMMEDIATE_VALUE);
+         /*
+          * Changes the floating point rounding mode updating the control
+          * register field defined at cr0.0[5-6] bits.
+          */
+         enum brw_rnd_mode mode =
+            (enum brw_rnd_mode) (src[0].d << BRW_CR0_RND_MODE_SHIFT);
+         brw_float_controls_mode(p, mode, BRW_CR0_RND_MODE_MASK);
+      }
+         break;
+
       default:
          unreachable("Unsupported opcode");
       }
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index e2f9230..197306b 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -35,6 +35,11 @@ namespace brw {
 void
 vec4_visitor::emit_nir_code()
 {
+   /* Globally set the rounding mode based on the float controls.  gen7 doesn't
+    * support 16-bit floats, and gen8 switches to scalar VS.  So we don't need
+    * to do any per-instruction mode switching the way the scalar FS handles.
+    */
+   emit_shader_float_controls_execution_mode();
    if (nir->num_uniforms > 0)
       nir_setup_uniforms();
 
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
index 33b6822..f205baa 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -1346,6 +1346,26 @@ vec4_visitor::resolve_ud_negate(src_reg *reg)
    *reg = temp;
 }
 
+static brw_rnd_mode
+brw_rnd_mode_from_execution_mode(unsigned execution_mode)
+{
+   if (nir_has_any_rounding_mode_rtne(execution_mode))
+      return BRW_RND_MODE_RTNE;
+   if (nir_has_any_rounding_mode_rtz(execution_mode))
+      return BRW_RND_MODE_RTZ;
+   return BRW_RND_MODE_UNSPECIFIED;
+}
+
+void
+vec4_visitor::emit_shader_float_controls_execution_mode()
+{
+   unsigned execution_mode = this->nir->info.float_controls_execution_mode;
+   if (nir_has_any_rounding_mode_enabled(execution_mode)) {
+      brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode);
+      emit(SHADER_OPCODE_RND_MODE, dst_null_ud(), brw_imm_d(rnd));
+   }
+}
+
 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                            void *log_data,
                            const struct brw_sampler_prog_key_data *key_tex,
-- 
2.7.4