nir: Don't optimize bcsel-of-shuffle across blocks
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 4 Feb 2021 21:45:11 +0000 (15:45 -0600)
committerMarge Bot <eric+marge@anholt.net>
Wed, 17 Feb 2021 03:59:25 +0000 (03:59 +0000)
We can't move the shuffle to a new block so this only works if the
shuffle and the bcsel are in the same block.  Fortunately, in the
motivating case, this is true.

Also, we have to be careful around discard.  We could try really hard to
just avoid moving them past discard but we choose to simply bail if we
see a discard instead.

Fixes: 4ff4d4e56966a40 "nir/opt_intrinsic: Optimize bcsel(b, shuffle..."
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9068>

src/compiler/nir/nir_opt_intrinsics.c

index 1eb2b64..74bad15 100644 (file)
@@ -53,20 +53,31 @@ src_is_single_use_shuffle(nir_src src, nir_ssa_def **data, nir_ssa_def **index)
 }
 
 static nir_ssa_def *
-try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu)
+try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu,
+                         bool block_has_discard)
 {
    assert(alu->op == nir_op_bcsel);
 
+   /* If we've seen a discard in this block, don't do the optimization.  We
+    * could try to do something fancy where we check if the shuffle is on our
+    * side of the discard or not but this is good enough for correctness for
+    * now and subgroup ops in the presence of discard aren't common.
+    */
+   if (block_has_discard)
+      return false;
+
    if (!nir_alu_src_is_trivial_ssa(alu, 0))
       return NULL;
 
    nir_ssa_def *data1, *index1;
    if (!nir_alu_src_is_trivial_ssa(alu, 1) ||
+       alu->src[1].src.ssa->parent_instr->block != alu->instr.block ||
        !src_is_single_use_shuffle(alu->src[1].src, &data1, &index1))
       return NULL;
 
    nir_ssa_def *data2, *index2;
    if (!nir_alu_src_is_trivial_ssa(alu, 2) ||
+       alu->src[2].src.ssa->parent_instr->block != alu->instr.block ||
        !src_is_single_use_shuffle(alu->src[2].src, &data2, &index2))
       return NULL;
 
@@ -80,13 +91,14 @@ try_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu)
 }
 
 static bool
-opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu)
+opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu,
+                   bool block_has_discard)
 {
    nir_ssa_def *replacement = NULL;
 
    switch (alu->op) {
    case nir_op_bcsel:
-      replacement = try_opt_bcsel_of_shuffle(b, alu);
+      replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard);
       break;
 
    default:
@@ -160,20 +172,32 @@ opt_intrinsics_impl(nir_function_impl *impl,
    bool progress = false;
 
    nir_foreach_block(block, impl) {
+      bool block_has_discard = false;
+
       nir_foreach_instr_safe(instr, block) {
          b.cursor = nir_before_instr(instr);
 
          switch (instr->type) {
          case nir_instr_type_alu:
-            if (opt_intrinsics_alu(&b, nir_instr_as_alu(instr)))
+            if (opt_intrinsics_alu(&b, nir_instr_as_alu(instr),
+                                   block_has_discard))
                progress = true;
             break;
 
-         case nir_instr_type_intrinsic:
-            if (opt_intrinsics_intrin(&b, nir_instr_as_intrinsic(instr),
-                                      options))
+         case nir_instr_type_intrinsic: {
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic == nir_intrinsic_discard ||
+                intrin->intrinsic == nir_intrinsic_discard_if ||
+                intrin->intrinsic == nir_intrinsic_demote ||
+                intrin->intrinsic == nir_intrinsic_demote_if ||
+                intrin->intrinsic == nir_intrinsic_terminate ||
+                intrin->intrinsic == nir_intrinsic_terminate_if)
+               block_has_discard = true;
+
+            if (opt_intrinsics_intrin(&b, intrin, options))
                progress = true;
             break;
+         }
 
          default:
             break;