aco: Eliminate useless exec writes in jump threading.

author Timur Kristóf <timur.kristof@gmail.com>

Thu, 13 May 2021 18:50:40 +0000 (20:50 +0200)

committer Marge Bot <eric+marge@anholt.net>

Tue, 18 May 2021 11:48:22 +0000 (11:48 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Thu, 13 May 2021 18:50:40 +0000 (20:50 +0200)
committer Marge Bot <eric+marge@anholt.net>
Tue, 18 May 2021 11:48:22 +0000 (11:48 +0000)
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp

index f4884a4..ca63ed8 100644 (file)
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -40,12 +40,14 @@ struct ssa_elimination_ctx {
     std::vector<std::vector<phi_info_item>> logical_phi_info;
     std::vector<std::vector<phi_info_item>> linear_phi_info;
     std::vector<bool> empty_blocks;
+   std::vector<bool> blocks_incoming_exec_used;
     Program* program;
  
     ssa_elimination_ctx(Program* program_)
        : logical_phi_info(program_->blocks.size())
        , linear_phi_info(program_->blocks.size())
        , empty_blocks(program_->blocks.size(), true)
+      , blocks_incoming_exec_used(program_->blocks.size(), true)
        , program(program_) {}
  };
  
@@ -274,10 +276,84 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
     block->linear_succs.clear();
  }
  
+bool instr_writes_exec(Instruction* instr)
+{
+   for (Definition& def : instr->definitions)
+      if (def.physReg() == exec || def.physReg() == exec_hi)
+         return true;
+
+   return false;
+}
+
+void eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block)
+{
+   /* Check if any successor needs the outgoing exec mask from the current block. */
+
+   bool exec_write_used;
+
+   if (!ctx.logical_phi_info[block.index].empty()) {
+      exec_write_used = true;
+   } else {
+      bool will_insert_exec_copy = false;
+      bool will_inserted_exec_copy_need_exec = false;
+
+      for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) {
+         if (successor_phi_info.def.physReg() == exec)
+            will_insert_exec_copy = true;
+         if (successor_phi_info.op.physReg() == exec)
+            will_inserted_exec_copy_need_exec = true;
+      }
+
+      if (will_insert_exec_copy && !will_inserted_exec_copy_need_exec)
+         exec_write_used = false;
+      else
+         /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */
+         exec_write_used = std::any_of(block.linear_succs.begin(), block.linear_succs.end(),
+                                       [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; });
+   }
+
+   /* Go through all instructions and eliminate useless exec writes. */
+
+   for (int i = block.instructions.size() - 1; i >= 0; --i) {
+      aco_ptr<Instruction>& instr = block.instructions[i];
+
+      /* We already take information from phis into account before the loop, so let's just break on phis. */
+      if (instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi)
+         break;
+
+      /* See if the current instruction needs or writes exec. */
+      bool needs_exec = needs_exec_mask(instr.get());
+      bool writes_exec = instr_writes_exec(instr.get());
+
+      /* See if we found an unused exec write. */
+      if (writes_exec && !exec_write_used) {
+         instr.reset();
+         continue;
+      }
+
+      /* For a newly encountered exec write, clear the used flag. */
+      if (writes_exec)
+         exec_write_used = false;
+
+      /* If the current instruction needs exec, mark it as used. */
+      exec_write_used |= needs_exec;
+   }
+
+   /* Remember if the current block needs an incoming exec mask from its predecessors. */
+
+   ctx.blocks_incoming_exec_used[block.index] = exec_write_used;
+
+   /* Cleanup: remove deleted instructions from the vector. */
+
+   auto new_end = std::remove(block.instructions.begin(), block.instructions.end(), nullptr);
+   block.instructions.resize(new_end - block.instructions.begin());
+}
+
  void jump_threading(ssa_elimination_ctx& ctx)
  {
     for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
        Block* block = &ctx.program->blocks[i];
+      eliminate_useless_exec_writes_in_block(ctx, *block);
  
        if (!ctx.empty_blocks[i])
           continue;
author	Timur Kristóf <timur.kristof@gmail.com>
	Thu, 13 May 2021 18:50:40 +0000 (20:50 +0200)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 18 May 2021 11:48:22 +0000 (11:48 +0000)