aco/insert_exec_mask: Simplify WQM handling (2/2)
authorDaniel Schürmann <daniel@schuermann.dev>
Mon, 4 Sep 2023 08:07:27 +0000 (10:07 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 14 Sep 2023 09:25:23 +0000 (09:25 +0000)
by calculating WQM requirements on demand.

No fossil-db changes.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25038>

src/amd/compiler/aco_insert_exec_mask.cpp

index 6a31a01..6053874 100644 (file)
@@ -47,18 +47,6 @@ enum mask_type : uint8_t {
    mask_type_loop = 1 << 3, /* active lanes of a loop */
 };
 
-struct wqm_ctx {
-   Program* program;
-   /* state for WQM propagation */
-   std::set<unsigned> worklist;
-   std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */
-   wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size())
-   {
-      for (unsigned i = 0; i < program->blocks.size(); i++)
-         worklist.insert(i);
-   }
-};
-
 struct loop_info {
    Block* loop_header;
    uint16_t num_exec_masks;
@@ -74,8 +62,6 @@ struct loop_info {
 struct block_info {
    std::vector<std::pair<Operand, uint8_t>>
       exec; /* Vector of exec masks. Either a temporary or const -1. */
-   std::vector<WQMState> instr_needs;
-   uint8_t block_needs;
 };
 
 struct exec_ctx {
@@ -108,70 +94,16 @@ needs_exact(aco_ptr<Instruction>& instr)
    }
 }
 
-void
-mark_block_wqm(wqm_ctx& ctx, unsigned block_idx)
+WQMState
+get_instr_needs(aco_ptr<Instruction>& instr)
 {
-   if (ctx.branch_wqm[block_idx])
-      return;
-
-   for (Block& block : ctx.program->blocks) {
-      if (block.index >= block_idx && block.kind & block_kind_top_level)
-         break;
-      ctx.branch_wqm[block.index] = true;
-      ctx.worklist.insert(block.index);
-   }
-}
-
-void
-get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
-{
-   block_info& info = exec_ctx.info[block->index];
-
-   std::vector<WQMState> instr_needs(block->instructions.size());
-
-   bool propagate_wqm = ctx.branch_wqm[block->index];
-   for (int i = block->instructions.size() - 1; i >= 0; --i) {
-      aco_ptr<Instruction>& instr = block->instructions[i];
-
-      if (instr->opcode == aco_opcode::p_end_wqm)
-         propagate_wqm = true;
+   if (needs_exact(instr))
+      return Exact;
 
-      bool pred_by_exec = needs_exec_mask(instr.get()) ||
-                          instr->opcode == aco_opcode::p_logical_end || instr->isBranch();
-
-      if (needs_exact(instr))
-         instr_needs[i] = Exact;
-      else if (propagate_wqm && pred_by_exec)
-         instr_needs[i] = WQM;
-      else
-         instr_needs[i] = Unspecified;
-
-      info.block_needs |= instr_needs[i];
-   }
-
-   info.instr_needs = instr_needs;
-
-   /* for "if (<cond>) <wqm code>" or "while (<cond>) <wqm code>",
-    * <cond> should be computed in WQM */
-   if (propagate_wqm) {
-      mark_block_wqm(ctx, block->index);
-   }
-}
-
-void
-calculate_wqm_needs(exec_ctx& exec_ctx)
-{
-   wqm_ctx ctx(exec_ctx.program);
-
-   while (!ctx.worklist.empty()) {
-      unsigned block_index = *std::prev(ctx.worklist.end());
-      ctx.worklist.erase(std::prev(ctx.worklist.end()));
-
-      Block& block = exec_ctx.program->blocks[block_index];
-      get_block_needs(ctx, exec_ctx, &block);
-   }
+   bool pred_by_exec = needs_exec_mask(instr.get()) || instr->opcode == aco_opcode::p_logical_end ||
+                       instr->isBranch();
 
-   exec_ctx.handle_wqm = true;
+   return pred_by_exec ? WQM : Unspecified;
 }
 
 Operand
@@ -542,13 +474,13 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
    for (; idx < block->instructions.size(); idx++) {
       aco_ptr<Instruction> instr = std::move(block->instructions[idx]);
 
-      WQMState needs = ctx.handle_wqm ? ctx.info[block->index].instr_needs[idx] : Unspecified;
+      WQMState needs = ctx.handle_wqm ? get_instr_needs(instr) : Unspecified;
 
       if (needs == WQM && state != WQM) {
          transition_to_WQM(ctx, bld, block->index);
          state = WQM;
       } else if (needs == Exact) {
-         if (ctx.info[block->index].block_needs & WQM)
+         if (ctx.handle_wqm)
             handle_atomic_data(ctx, bld, block->index, instr);
          transition_to_Exact(ctx, bld, block->index);
          state = Exact;
@@ -564,7 +496,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
             current_exec = get_exec_op(ctx.info[block->index].exec[0].first);
             ctx.info[block->index].exec[0].first = Operand(bld.lm);
             state = Exact;
-         } else if (ctx.info[block->index].exec.size() >= 2 && needs == WQM) {
+         } else if (ctx.info[block->index].exec.size() >= 2 && ctx.handle_wqm) {
             /* Preserve the WQM mask */
             ctx.info[block->index].exec[1].second &= ~mask_type_global;
          }
@@ -904,7 +836,7 @@ insert_exec_mask(Program* program)
    exec_ctx ctx(program);
 
    if (program->needs_wqm && program->needs_exact)
-      calculate_wqm_needs(ctx);
+      ctx.handle_wqm = true;
 
    for (Block& block : program->blocks)
       process_block(ctx, &block);