radeonsi: follow shader_info.float_controls_execution_mode (mostly)
authorMarek Olšák <marek.olsak@amd.com>
Tue, 19 Jul 2022 02:13:57 +0000 (22:13 -0400)
committerMarge Bot <emma+marge@anholt.net>
Wed, 3 Aug 2022 00:57:16 +0000 (00:57 +0000)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17864>

src/amd/common/ac_binary.c
src/amd/registers/gfx10.json
src/amd/registers/gfx103.json
src/amd/registers/gfx11.json
src/amd/registers/gfx6.json
src/amd/registers/gfx7.json
src/amd/registers/gfx8.json
src/amd/registers/gfx81.json
src/amd/registers/gfx9.json
src/amd/registers/parse_kernel_headers.py
src/gallium/drivers/radeonsi/si_shader.c

index 3fa0cc8..96c83fa 100644 (file)
@@ -138,6 +138,6 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
     * - denormals break v_mad_f32
     * - GFX6 & GFX7 would be very slow
     */
-   conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
-   conf->float_mode |= V_00B028_FP_64_DENORMS;
+   conf->float_mode &= ~V_00B028_FP_32_DENORMS;
+   conf->float_mode |= V_00B028_FP_16_64_DENORMS;
 }
index 53edb48..5bbaf86 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 4a83aff..b2c0290 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 334c1b1..38308f0 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 0384b5b..a9ea8a6 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 2c29aac..556e893 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index e0c4eab..1ad0450 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 667e8fa..c8c834d 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 8ce7241..6d0f3c9 100644 (file)
   },
   "FLOAT_MODE": {
    "entries": [
+    {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+    {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
     {"name": "FP_32_DENORMS", "value": 48},
-    {"name": "FP_64_DENORMS", "value": 192},
-    {"name": "FP_ALL_DENORMS", "value": 240}
+    {"name": "FP_16_64_DENORMS", "value": 192}
    ]
   },
   "ForceControl": {
index 4bae195..67883f4 100644 (file)
@@ -418,9 +418,10 @@ VRSHtileEncoding = {
 missing_enums_all = {
   'FLOAT_MODE': {
     "entries": [
+      {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+      {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
       {"name": "FP_32_DENORMS", "value": 48},
-      {"name": "FP_64_DENORMS", "value": 192},
-      {"name": "FP_ALL_DENORMS", "value": 240}
+      {"name": "FP_16_64_DENORMS", "value": 192},
     ]
   },
   'QUANT_MODE': {
index 531fc60..63cafa8 100644 (file)
@@ -1839,6 +1839,33 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
    shader->info.uses_instanceid = sel->info.uses_instanceid;
    shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
 
+   /* Set the FP ALU behavior. */
+   /* By default, we disable denormals for FP32 and enable them for FP16 and FP64
+    * for performance and correctness reasons. FP32 denormals can't be enabled because
+    * they break output modifiers and v_mad_f32 and are very slow on GFX6-7.
+    *
+    * float_controls_execution_mode defines the set of valid behaviors. Contradicting flags
+    * can be set simultaneously, which means we are allowed to choose, but not really because
+    * some options cause GLCTS failures.
+    */
+   unsigned float_mode = V_00B028_FP_16_64_DENORMS;
+
+   if (!(nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) &&
+       nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32)
+      float_mode |= V_00B028_FP_32_ROUND_TOWARDS_ZERO;
+
+   if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 |
+                                                    FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64)) &&
+       nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 |
+                                                  FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64))
+      float_mode |= V_00B028_FP_16_64_ROUND_TOWARDS_ZERO;
+
+   if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_PRESERVE_FP16 |
+                                                    FLOAT_CONTROLS_DENORM_PRESERVE_FP64)) &&
+       nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 |
+                                                  FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64))
+      float_mode &= ~V_00B028_FP_16_64_DENORMS;
+
    /* TODO: ACO could compile non-monolithic shaders here (starting
     * with PS and NGG VS), but monolithic shaders should be compiled
     * by LLVM due to more complicated compilation.
@@ -1846,6 +1873,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
    if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
       return false;
 
+   shader->config.float_mode = float_mode;
+
    /* The GS copy shader is compiled next. */
    if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
       shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);