From 03a72d96d8dacc32e817089b94bec08ac70b898b Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 29 Mar 2019 22:51:20 -0500
Subject: [PATCH] nir/algebraic: Drop some @bool specifiers

Now that we have one-bit booleans, we don't need to rely on looking at
parent instructions in order to figure out if a value is a Boolean most
of the time.  We can drop these specifiers and now the optimizations
will apply more generally.

Shader-DB results on Kaby Lake:

    total instructions in shared programs: 15321168 -> 15321227 (<.01%)
    instructions in affected programs: 8836 -> 8895 (0.67%)
    helped: 1
    HURT: 31

    total cycles in shared programs: 357481781 -> 357481321 (<.01%)
    cycles in affected programs: 146524 -> 146064 (-0.31%)
    helped: 22
    HURT: 10

    total spills in shared programs: 23675 -> 23673 (<.01%)
    spills in affected programs: 11 -> 9 (-18.18%)
    helped: 1
    HURT: 0

    total fills in shared programs: 32040 -> 32036 (-0.01%)
    fills in affected programs: 27 -> 23 (-14.81%)
    helped: 1
    HURT: 0

No change in VkPipeline-DB

Looking at the instructions hurt, a bunch of them seem to be a case
where doing exactly the right thing in NIR ends up doing the wrong-ish
thing in the back-end because flags are dumb.  In particular, there's a
case where we have a MUL followed by a CMP followed by a SEL and when we
turn that SEL into an OR, it uses the GRF result of the CMP rather than
the flag result so the CMP can't be merged with the MUL.  Those shaders
appear to schedule better according to the cycle estimates so I guess
it's a win?  Also it helps spilling in one Car Chase compute shader.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index bdf7078..597d479 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -312,7 +312,7 @@ optimizations = [
    (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))),
    (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
    (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
-   (('bcsel', a, True, 'b@bool'), ('ior', a, b)),
+   (('bcsel', a, True, b), ('ior', a, b)),
    (('fmin', a, a), a),
    (('fmax', a, a), a),
    (('imin', a, a), a),
@@ -390,7 +390,7 @@ optimizations = [
    (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)),
    (('ior', ('uge', 2, a), ('ieq', a, 3)), ('uge', 3, a)),
 
-   (('ior', 'a@bool', ('ieq', a, False)), True),
+   (('ior', a, ('ieq', a, False)), True),
    (('ior', a, ('inot', a)), -1),
 
    (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 'b@32'), 0)),
@@ -535,10 +535,10 @@ optimizations = [
    # Boolean simplifications
    (('i2b32(is_used_by_if)', a), ('ine32', a, 0)),
    (('i2b1(is_used_by_if)', a), ('ine', a, 0)),
-   (('ieq', 'a@bool', True), a),
-   (('ine(is_not_used_by_if)', 'a@bool', True), ('inot', a)),
-   (('ine', 'a@bool', False), a),
-   (('ieq(is_not_used_by_if)', 'a@bool', False), ('inot', 'a')),
+   (('ieq', a, True), a),
+   (('ine(is_not_used_by_if)', a, True), ('inot', a)),
+   (('ine', a, False), a),
+   (('ieq(is_not_used_by_if)', a, False), ('inot', 'a')),
    (('bcsel', a, True, False), a),
    (('bcsel', a, False, True), ('inot', a)),
    (('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
@@ -1018,7 +1018,7 @@ late_optimizations = [
    (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
 
-   (('bcsel', 'a@bool', 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))),
+   (('bcsel', a, 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))),
 ]
 
 print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
-- 
2.7.4