aco: Better phi lowering for merge block when else-side is const.
authorTimur Kristóf <timur.kristof@gmail.com>
Tue, 21 Feb 2023 14:04:40 +0000 (15:04 +0100)
committerMarge Bot <emma+marge@anholt.net>
Mon, 3 Apr 2023 14:36:07 +0000 (14:36 +0000)
Add a new special case for binary merge blocks to boolean
phi lowerig. This special case benefits shaders that
have divergent branches with an empty else block,
for example all NGG culling shaders.

Fossil DB stats on Rembrandt (NGG culling enabled):

Totals from 61778 (45.79% of 134913) affected shaders:
SpillVGPRs: 2268 -> 2284 (+0.71%); split: -1.10%, +1.81%
CodeSize: 164317952 -> 162962772 (-0.82%); split: -0.83%, +0.00%
Instrs: 31249824 -> 30910686 (-1.09%); split: -1.09%, +0.00%
Latency: 154948555 -> 154781097 (-0.11%); split: -0.12%, +0.02%
InvThroughput: 30397664 -> 30370872 (-0.09%); split: -0.13%, +0.04%
VClause: 529239 -> 529229 (-0.00%); split: -0.00%, +0.00%
SClause: 783417 -> 783430 (+0.00%)
Copies: 2627570 -> 2595161 (-1.23%); split: -1.25%, +0.02%
Branches: 976506 -> 976508 (+0.00%); split: -0.00%, +0.00%

Fossil DB stats on GFX11 (NGG culling disabled):

Totals from 895 (0.66% of 134913) affected shaders:
SpillVGPRs: 2258 -> 2322 (+2.83%); split: -0.44%, +3.28%
CodeSize: 6229152 -> 6215880 (-0.21%); split: -0.37%, +0.16%
Scratch: 216576 -> 215808 (-0.35%); split: -0.47%, +0.12%
Instrs: 1202077 -> 1198396 (-0.31%); split: -0.43%, +0.13%
Latency: 15921336 -> 16000561 (+0.50%); split: -0.74%, +1.24%
InvThroughput: 7425765 -> 7474891 (+0.66%); split: -0.67%, +1.33%
VClause: 22976 -> 23008 (+0.14%); split: -0.03%, +0.17%
SClause: 38269 -> 38271 (+0.01%)
Copies: 123244 -> 123896 (+0.53%); split: -0.30%, +0.83%
Branches: 47570 -> 47574 (+0.01%); split: -0.00%, +0.01%

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21493>

src/amd/compiler/aco_lower_phis.cpp

index 6b8f611..f20f4f3 100644 (file)
@@ -193,6 +193,50 @@ build_merge_code(Program* program, ssa_state* state, Block* block, Operand cur)
 }
 
 void
+build_const_else_merge_code(Program* program, Block& invert_block, aco_ptr<Instruction>& phi)
+{
+   /* When the else-side operand of a binary merge phi is constant,
+    * we can use a simpler way to lower the phi by emitting some
+    * instructions to the invert block instead.
+    * This allows us to actually delete the else block when it's empty.
+    */
+   assert(invert_block.kind & block_kind_invert);
+   Builder bld(program);
+   Operand then = phi->operands[0];
+   const Operand els = phi->operands[1];
+
+   /* Only -1 (all lanes true) and 0 (all lanes false) constants are supported here. */
+   assert(!then.isConstant() || then.constantEquals(0) || then.constantEquals(-1));
+   assert(els.constantEquals(0) || els.constantEquals(-1));
+
+   if (!then.isConstant()) {
+      /* Left-hand operand is not constant, so we need to emit a phi to access it. */
+      bld.reset(&invert_block.instructions, invert_block.instructions.begin());
+      then = bld.pseudo(aco_opcode::p_linear_phi, bld.def(bld.lm), then, Operand(bld.lm));
+   }
+
+   auto after_phis =
+      std::find_if(invert_block.instructions.begin(), invert_block.instructions.end(),
+                   [](const aco_ptr<Instruction>& instr) -> bool { return !is_phi(instr.get()); });
+   bld.reset(&invert_block.instructions, after_phis);
+
+   Temp tmp;
+   if (then.constantEquals(-1) && els.constantEquals(0)) {
+      tmp = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
+   } else {
+      Builder::WaveSpecificOpcode opc = els.constantEquals(0) ? Builder::s_and : Builder::s_orn2;
+      tmp = bld.sop2(opc, bld.def(bld.lm), bld.def(s1, scc), then, Operand(exec, bld.lm));
+   }
+
+   /* We can't delete the original phi because that'd invalidate the iterator in lower_phis,
+    * so just make it a trivial phi instead.
+    */
+   phi->opcode = aco_opcode::p_linear_phi;
+   phi->operands[0] = Operand(tmp);
+   phi->operands[1] = Operand(tmp);
+}
+
+void
 init_any_pred_defined(Program* program, ssa_state* state, Block* block, aco_ptr<Instruction>& phi)
 {
    std::fill(state->any_pred_defined.begin(), state->any_pred_defined.end(), pred_defined::undef);
@@ -268,6 +312,12 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block,
       return;
    }
 
+   if (phi->operands.size() == 2 && phi->operands[1].isConstant() &&
+       (block->kind & block_kind_merge)) {
+      build_const_else_merge_code(program, program->blocks[block->linear_idom], phi);
+      return;
+   }
+
    /* do this here to avoid resizing in case of no boolean phis */
    state->visited.resize(program->blocks.size());
    state->outputs.resize(program->blocks.size());