From 50d335804fb9cfeb20f20b1c031e39a6d239791b Mon Sep 17 00:00:00 2001
From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 5 Sep 2020 21:09:38 -0400
Subject: [PATCH] nir/algebraic: add late optimizations that optimize out
 mediump conversions (v3)

v2: move *2*mp patterns to the end of late_optimizations
v3: remove ftrunc from the optimizations to fix:
    dEQP-GLES3.functional.shaders.builtin_functions.common.modf.vec2_lowp_vertex

Reviewed-by: Rob Clark <robdclark@chromium.org> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6283>
---
 .gitlab-ci/traces-freedreno.yml       |  2 +-
 src/compiler/nir/nir_opt_algebraic.py | 50 +++++++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml
index a22e20f..163833a 100644
--- a/.gitlab-ci/traces-freedreno.yml
+++ b/.gitlab-ci/traces-freedreno.yml
@@ -230,7 +230,7 @@ traces:
   - path: glmark2/terrain.rdc
     expectations:
       - device: freedreno-a630
-        checksum: 2368b3132a8768bc3a98b3fda0a4830e
+        checksum: 114f7dfe97768d9c565a29f656c8f9cf
   - path: glmark2/texture-texture-filter=linear.rdc
     expectations:
       - device: freedreno-a630
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f90c1df..c948faf 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2126,17 +2126,6 @@ late_optimizations = [
    (('~fadd', ('ffma(is_used_once)', a, b, ('fmul', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'),
     ('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
 
-   # Convert *2*mp instructions to concrete *2*16 instructions. At this point
-   # any conversions that could have been removed will have been removed in
-   # nir_opt_algebraic so any remaining ones are required.
-   (('f2fmp', a), ('f2f16', a)),
-   (('f2imp', a), ('f2i16', a)),
-   (('f2ump', a), ('f2u16', a)),
-   (('i2imp', a), ('i2i16', a)),
-   (('i2fmp', a), ('i2f16', a)),
-   (('i2imp', a), ('u2u16', a)),
-   (('u2fmp', a), ('u2f16', a)),
-
    # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
    #
    #    If bits is zero, the result will be zero.
@@ -2199,6 +2188,45 @@ for op in ['ffma']:
         (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
     ]
 
+# mediump: If an opcode is surrounded by conversions, remove the conversions.
+# The rationale is that type conversions + the low precision opcode are more
+# expensive that the same arithmetic opcode at higher precision.
+#
+# This must be done in late optimizations, because we need normal optimizations to
+# first eliminate temporary up-conversions such as in op1(f2fmp(f2f32(op2()))).
+#
+# Unary opcodes
+for op in ['fabs', 'fceil', 'fcos', 'fddx', 'fddx_coarse', 'fddx_fine', 'fddy',
+           'fddy_coarse', 'fddy_fine', 'fexp2', 'ffloor', 'ffract', 'flog2', 'fneg',
+           'frcp', 'fround_even', 'frsq', 'fsat', 'fsign', 'fsin', 'fsqrt']:
+    late_optimizations += [(('~f2f32', (op, ('f2fmp', a))), (op, a))]
+
+# Binary opcodes
+for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']:
+    late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b))]
+
+# Ternary opcodes
+for op in ['ffma', 'flrp']:
+    late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c))]
+
+# Comparison opcodes
+for op in ['feq', 'fge', 'flt', 'fneu']:
+    late_optimizations += [(('~' + op, ('f2fmp', a), ('f2fmp', b)), (op, a, b))]
+
+# Do this last, so that the f2fmp patterns above have effect.
+late_optimizations += [
+  # Convert *2*mp instructions to concrete *2*16 instructions. At this point
+  # any conversions that could have been removed will have been removed in
+  # nir_opt_algebraic so any remaining ones are required.
+  (('f2fmp', a), ('f2f16', a)),
+  (('f2imp', a), ('f2i16', a)),
+  (('f2ump', a), ('f2u16', a)),
+  (('i2imp', a), ('i2i16', a)),
+  (('i2fmp', a), ('i2f16', a)),
+  (('i2imp', a), ('u2u16', a)),
+  (('u2fmp', a), ('u2f16', a)),
+]
+
 distribute_src_mods = [
    # Try to remove some spurious negations rather than pushing them down.
    (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)),
-- 
2.7.4