aco/gfx11: update form_hard_clauses
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 21 Jul 2022 15:15:45 +0000 (16:15 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 30 Sep 2022 20:57:02 +0000 (20:57 +0000)
See https://reviews.llvm.org/D127391

fossil-db (gfx1100):
Totals from 116 (0.07% of 161689) affected shaders:
Instrs: 124719 -> 124664 (-0.04%); split: -0.06%, +0.02%
CodeSize: 731660 -> 731440 (-0.03%); split: -0.04%, +0.01%
Latency: 2771695 -> 2771671 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 1050309 -> 1050312 (+0.00%)
VClause: 3731 -> 3779 (+1.29%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17710>

src/amd/compiler/aco_form_hard_clauses.cpp

index ebb9b05..c32a813 100644 (file)
@@ -32,10 +32,23 @@ namespace {
 
 /* there can also be LDS and VALU clauses, but I don't see how those are interesting */
 enum clause_type {
-   clause_vmem,
-   clause_flat,
    clause_smem,
    clause_other,
+   /* GFX10: */
+   clause_vmem,
+   clause_flat,
+   /* GFX11: */
+   clause_mimg_load,
+   clause_mimg_store,
+   clause_mimg_atomic,
+   clause_mimg_sample,
+   clause_vmem_load,
+   clause_vmem_store,
+   clause_vmem_atomic,
+   clause_flat_load,
+   clause_flat_store,
+   clause_flat_atomic,
+   clause_bvh,
 };
 
 void
@@ -59,6 +72,175 @@ emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction>* instrs)
       bld.insert(std::move(instrs[i]));
 }
 
+clause_type
+get_type(Program* program, aco_ptr<Instruction>& instr)
+{
+   if (instr->isSMEM() && !instr->operands.empty())
+      return clause_smem;
+
+   if (program->gfx_level >= GFX11) {
+      if (instr->isMIMG()) {
+         switch (instr->opcode) {
+         case aco_opcode::image_bvh_intersect_ray:
+         case aco_opcode::image_bvh64_intersect_ray: return clause_bvh;
+         case aco_opcode::image_atomic_swap:
+         case aco_opcode::image_atomic_cmpswap:
+         case aco_opcode::image_atomic_add:
+         case aco_opcode::image_atomic_sub:
+         case aco_opcode::image_atomic_rsub:
+         case aco_opcode::image_atomic_smin:
+         case aco_opcode::image_atomic_umin:
+         case aco_opcode::image_atomic_smax:
+         case aco_opcode::image_atomic_umax:
+         case aco_opcode::image_atomic_and:
+         case aco_opcode::image_atomic_or:
+         case aco_opcode::image_atomic_xor:
+         case aco_opcode::image_atomic_inc:
+         case aco_opcode::image_atomic_dec:
+         case aco_opcode::image_atomic_fcmpswap:
+         case aco_opcode::image_atomic_fmin:
+         case aco_opcode::image_atomic_fmax: return clause_mimg_atomic;
+         default:
+            if (instr->definitions.empty())
+               return clause_mimg_store;
+            else
+               return !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4
+                         ? clause_mimg_sample
+                         : clause_mimg_load;
+         }
+      } else if (instr->isMTBUF() || instr->isScratch()) {
+         return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
+      } else if (instr->isMUBUF()) {
+         switch (instr->opcode) {
+         case aco_opcode::buffer_atomic_add:
+         case aco_opcode::buffer_atomic_and_x2:
+         case aco_opcode::buffer_atomic_rsub:
+         case aco_opcode::buffer_atomic_umax:
+         case aco_opcode::buffer_atomic_dec:
+         case aco_opcode::buffer_atomic_smax:
+         case aco_opcode::buffer_atomic_fmax:
+         case aco_opcode::buffer_atomic_rsub_x2:
+         case aco_opcode::buffer_atomic_smin:
+         case aco_opcode::buffer_atomic_sub:
+         case aco_opcode::buffer_atomic_sub_x2:
+         case aco_opcode::buffer_atomic_xor_x2:
+         case aco_opcode::buffer_atomic_add_f32:
+         case aco_opcode::buffer_atomic_inc:
+         case aco_opcode::buffer_atomic_swap_x2:
+         case aco_opcode::buffer_atomic_cmpswap:
+         case aco_opcode::buffer_atomic_fmin_x2:
+         case aco_opcode::buffer_atomic_umin:
+         case aco_opcode::buffer_atomic_or:
+         case aco_opcode::buffer_atomic_umax_x2:
+         case aco_opcode::buffer_atomic_smin_x2:
+         case aco_opcode::buffer_atomic_umin_x2:
+         case aco_opcode::buffer_atomic_cmpswap_x2:
+         case aco_opcode::buffer_atomic_add_x2:
+         case aco_opcode::buffer_atomic_swap:
+         case aco_opcode::buffer_atomic_and:
+         case aco_opcode::buffer_atomic_fmin:
+         case aco_opcode::buffer_atomic_fcmpswap_x2:
+         case aco_opcode::buffer_atomic_or_x2:
+         case aco_opcode::buffer_atomic_fcmpswap:
+         case aco_opcode::buffer_atomic_xor:
+         case aco_opcode::buffer_atomic_dec_x2:
+         case aco_opcode::buffer_atomic_fmax_x2:
+         case aco_opcode::buffer_atomic_csub:
+         case aco_opcode::buffer_atomic_inc_x2:
+         case aco_opcode::buffer_atomic_smax_x2: return clause_vmem_atomic;
+         default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
+         }
+      } else if (instr->isGlobal()) {
+         switch (instr->opcode) {
+         case aco_opcode::global_atomic_swap:
+         case aco_opcode::global_atomic_umax:
+         case aco_opcode::global_atomic_cmpswap:
+         case aco_opcode::global_atomic_and_x2:
+         case aco_opcode::global_atomic_fmax:
+         case aco_opcode::global_atomic_smax_x2:
+         case aco_opcode::global_atomic_fmax_x2:
+         case aco_opcode::global_atomic_dec:
+         case aco_opcode::global_atomic_dec_x2:
+         case aco_opcode::global_atomic_umin:
+         case aco_opcode::global_atomic_fcmpswap_x2:
+         case aco_opcode::global_atomic_inc:
+         case aco_opcode::global_atomic_and:
+         case aco_opcode::global_atomic_fmin:
+         case aco_opcode::global_atomic_fcmpswap:
+         case aco_opcode::global_atomic_or_x2:
+         case aco_opcode::global_atomic_smax:
+         case aco_opcode::global_atomic_sub:
+         case aco_opcode::global_atomic_xor:
+         case aco_opcode::global_atomic_swap_x2:
+         case aco_opcode::global_atomic_umax_x2:
+         case aco_opcode::global_atomic_umin_x2:
+         case aco_opcode::global_atomic_xor_x2:
+         case aco_opcode::global_atomic_inc_x2:
+         case aco_opcode::global_atomic_fmin_x2:
+         case aco_opcode::global_atomic_add_f32:
+         case aco_opcode::global_atomic_add:
+         case aco_opcode::global_atomic_or:
+         case aco_opcode::global_atomic_add_x2:
+         case aco_opcode::global_atomic_smin_x2:
+         case aco_opcode::global_atomic_smin:
+         case aco_opcode::global_atomic_csub:
+         case aco_opcode::global_atomic_sub_x2:
+         case aco_opcode::global_atomic_cmpswap_x2: return clause_vmem_atomic;
+         default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
+         }
+      } else if (instr->isFlat()) {
+         switch (instr->opcode) {
+         case aco_opcode::flat_atomic_smax:
+         case aco_opcode::flat_atomic_fcmpswap_x2:
+         case aco_opcode::flat_atomic_inc_x2:
+         case aco_opcode::flat_atomic_dec:
+         case aco_opcode::flat_atomic_fmin:
+         case aco_opcode::flat_atomic_umax_x2:
+         case aco_opcode::flat_atomic_add_f32:
+         case aco_opcode::flat_atomic_or:
+         case aco_opcode::flat_atomic_smax_x2:
+         case aco_opcode::flat_atomic_umin:
+         case aco_opcode::flat_atomic_sub:
+         case aco_opcode::flat_atomic_swap:
+         case aco_opcode::flat_atomic_swap_x2:
+         case aco_opcode::flat_atomic_cmpswap_x2:
+         case aco_opcode::flat_atomic_fcmpswap:
+         case aco_opcode::flat_atomic_add:
+         case aco_opcode::flat_atomic_umin_x2:
+         case aco_opcode::flat_atomic_xor_x2:
+         case aco_opcode::flat_atomic_smin:
+         case aco_opcode::flat_atomic_fmax_x2:
+         case aco_opcode::flat_atomic_cmpswap:
+         case aco_opcode::flat_atomic_dec_x2:
+         case aco_opcode::flat_atomic_sub_x2:
+         case aco_opcode::flat_atomic_add_x2:
+         case aco_opcode::flat_atomic_umax:
+         case aco_opcode::flat_atomic_xor:
+         case aco_opcode::flat_atomic_and_x2:
+         case aco_opcode::flat_atomic_inc:
+         case aco_opcode::flat_atomic_and:
+         case aco_opcode::flat_atomic_fmin_x2:
+         case aco_opcode::flat_atomic_smin_x2:
+         case aco_opcode::flat_atomic_or_x2:
+         case aco_opcode::flat_atomic_fmax: return clause_flat_atomic;
+         default: return instr->definitions.empty() ? clause_flat_store : clause_flat_load;
+         }
+      }
+   } else {
+      if (instr->isVMEM() && !instr->operands.empty()) {
+         if (program->gfx_level == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
+            return clause_other;
+         else
+            return clause_vmem;
+      } else if (instr->isScratch() || instr->isGlobal()) {
+         return clause_vmem;
+      } else if (instr->isFlat()) {
+         return clause_flat;
+      }
+   }
+   return clause_other;
+}
+
 } /* end namespace */
 
 void
@@ -76,21 +258,7 @@ form_hard_clauses(Program* program)
       for (unsigned i = 0; i < block.instructions.size(); i++) {
          aco_ptr<Instruction>& instr = block.instructions[i];
 
-         clause_type type = clause_other;
-         if (instr->isVMEM() && !instr->operands.empty()) {
-            if (program->gfx_level == GFX10 && instr->isMIMG() &&
-                get_mimg_nsa_dwords(instr.get()) > 0)
-               type = clause_other;
-            else
-               type = clause_vmem;
-         } else if (instr->isScratch() || instr->isGlobal()) {
-            type = clause_vmem;
-         } else if (instr->isFlat()) {
-            type = clause_flat;
-         } else if (instr->isSMEM() && !instr->operands.empty()) {
-            type = clause_smem;
-         }
-
+         clause_type type = get_type(program, instr);
          if (type != current_type || num_instrs == 64 ||
              (num_instrs && !should_form_clause(current_instrs[0].get(), instr.get()))) {
             emit_clause(bld, num_instrs, current_instrs);