From 5116646a7636ebc99714d1a0cc41cd402a915220 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 5 Mar 2019 12:46:11 -0800
Subject: [PATCH] nir/algebraic: Recognize open-coded fsat with modifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This change also enables a later change (nir/algebraic: Replace
1-fsat(a) with fsat(1-a)) to affect more shaders.

Almost all of the affected shaders are in Bioshock Infinite, and all of
those shaders all require GLSL 4.10.

All Intel platforms had similar results. (Ice Lake shown)
total instructions in shared programs: 17228584 -> 17228376 (<.01%)
instructions in affected programs: 31438 -> 31230 (-0.66%)
helped: 105
HURT: 0
helped stats (abs) min: 1 max: 5 xÌ: 1.98 xÌ: 1
helped stats (rel) min: 0.08% max: 1.53% xÌ: 0.73% xÌ: 0.70%
95% mean confidence interval for instructions value: -2.20 -1.76
95% mean confidence interval for instructions %-change: -0.80% -0.67%
Instructions are helped.

total cycles in shared programs: 360936431 -> 360935690 (<.01%)
cycles in affected programs: 420100 -> 419359 (-0.18%)
helped: 71
HURT: 21
helped stats (abs) min: 1 max: 160 xÌ: 19.28 xÌ: 10
helped stats (rel) min: <.01% max: 9.78% xÌ: 0.95% xÌ: 0.48%
HURT stats (abs)   min: 1 max: 198 xÌ: 29.90 xÌ: 10
HURT stats (rel)   min: 0.05% max: 8.36% xÌ: 1.24% xÌ: 0.90%
95% mean confidence interval for cycles value: -16.77 0.66
95% mean confidence interval for cycles %-change: -0.85% -0.06%
Inconclusive result (value mean confidence interval includes 0).

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index c488b2c..ccf57fd 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -105,6 +105,7 @@ optimizations = [
    (('iadd', a, ('iadd', ('ineg', a), b)), b),
    (('~fadd', ('fneg', a), ('fadd', a, b)), b),
    (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+   (('fadd', ('fsat', a), ('fsat', ('fneg', a))), ('fsat', ('fabs', a))),
    (('~fmul', a, 0.0), 0.0),
    (('imul', a, 0), 0),
    (('umul_unorm_4x8', a, 0), 0),
@@ -371,6 +372,8 @@ optimizations = [
    (('imax', a, ('ineg', a)), ('iabs', a)),
    (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
    (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+   (('~fmin', ('fmax', a, -1.0),  0.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_negate && !options->lower_fsat'),
+   (('~fmax', ('fmin', a,  0.0), -1.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_negate && !options->lower_fsat'),
    (('fsat', ('fsign', a)), ('b2f', ('flt', 0.0, a))),
    (('fsat', ('b2f', a)), ('b2f', a)),
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
@@ -646,6 +649,8 @@ optimizations = [
    (('ilt', ('f2u', a), b), ('ilt', ('f2i', a), b)),
    (('ilt', b, ('f2u', a)), ('ilt', b, ('f2i', a))),
 
+   (('~fmin', ('fabs', a), 1.0), ('fsat', ('fabs', a)), '!options->lower_fsat'),
+
    # Packing and then unpacking does nothing
    (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
    (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
-- 
2.7.4