aco: do not eliminate final exec write when p_end_with_regs block
authorQiang Yu <yuq825@gmail.com>
Fri, 15 Sep 2023 08:42:25 +0000 (16:42 +0800)
committerMarge Bot <emma+marge@anholt.net>
Tue, 10 Oct 2023 02:36:33 +0000 (02:36 +0000)
p_end_with_regs just partially end the program, next part need
exec mask to be set correctly. For example p_end_wqm will generate
a exec restore from WQM mode after p_end_with_regs.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24973>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_ir.h
src/amd/compiler/aco_ssa_elimination.cpp

index bf28ae1f41ed13a1fa865c0b1a6bbd293f1102bb..313310f27af3a3f585f266f13d29ecf72b753509 100644 (file)
@@ -11003,6 +11003,8 @@ build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
       end->operands[i] = regs[i];
 
    ctx->block->instructions.emplace_back(std::move(end));
+
+   ctx->block->kind |= block_kind_end_with_regs;
 }
 
 static void
index 7874096da8aae46db53fdd5404d4a66a97e984e0..71effd82266b1e3c7e35fc22dec2c35edd0139ac 100644 (file)
@@ -1901,6 +1901,7 @@ enum block_kind {
    block_kind_uses_discard = 1 << 12,
    block_kind_resume = 1 << 13,
    block_kind_export_end = 1 << 14,
+   block_kind_end_with_regs = 1 << 15,
 };
 
 struct RegisterDemand {
index 80644140b8bd445777736b73244948a0951959a9..729449fcf7faa5849c3ba1499877485defff5295 100644 (file)
@@ -547,24 +547,29 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block)
 {
    /* Check if any successor needs the outgoing exec mask from the current block. */
 
-   bool copy_to_exec = false;
-   bool copy_from_exec = false;
-
-   for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) {
-      copy_to_exec |= successor_phi_info.def.physReg() == exec;
-      copy_from_exec |= successor_phi_info.op.physReg() == exec;
-   }
-
    bool exec_write_used;
-   if (copy_from_exec)
+   if (block.kind & block_kind_end_with_regs) {
+      /* Last block of a program with succeed shader part should respect final exec write. */
       exec_write_used = true;
-   else if (copy_to_exec)
-      exec_write_used = false;
-   else
-      /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */
-      exec_write_used =
-         std::any_of(block.linear_succs.begin(), block.linear_succs.end(),
-                     [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; });
+   } else {
+      bool copy_to_exec = false;
+      bool copy_from_exec = false;
+
+      for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) {
+         copy_to_exec |= successor_phi_info.def.physReg() == exec;
+         copy_from_exec |= successor_phi_info.op.physReg() == exec;
+      }
+
+      if (copy_from_exec)
+         exec_write_used = true;
+      else if (copy_to_exec)
+         exec_write_used = false;
+      else
+         /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */
+         exec_write_used =
+            std::any_of(block.linear_succs.begin(), block.linear_succs.end(),
+                        [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; });
+   }
 
    /* Collect information about the branching sequence. */