From a406b36d30a7dd2d7623912dd91ec155bd18ed5a Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 21 Jul 2022 16:15:45 +0100 Subject: [PATCH] aco/gfx11: update form_hard_clauses MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit See https://reviews.llvm.org/D127391 fossil-db (gfx1100): Totals from 116 (0.07% of 161689) affected shaders: Instrs: 124719 -> 124664 (-0.04%); split: -0.06%, +0.02% CodeSize: 731660 -> 731440 (-0.03%); split: -0.04%, +0.01% Latency: 2771695 -> 2771671 (-0.00%); split: -0.00%, +0.00% InvThroughput: 1050309 -> 1050312 (+0.00%) VClause: 3731 -> 3779 (+1.29%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_form_hard_clauses.cpp | 202 ++++++++++++++++++++++++++--- 1 file changed, 185 insertions(+), 17 deletions(-) diff --git a/src/amd/compiler/aco_form_hard_clauses.cpp b/src/amd/compiler/aco_form_hard_clauses.cpp index ebb9b05..c32a813 100644 --- a/src/amd/compiler/aco_form_hard_clauses.cpp +++ b/src/amd/compiler/aco_form_hard_clauses.cpp @@ -32,10 +32,23 @@ namespace { /* there can also be LDS and VALU clauses, but I don't see how those are interesting */ enum clause_type { - clause_vmem, - clause_flat, clause_smem, clause_other, + /* GFX10: */ + clause_vmem, + clause_flat, + /* GFX11: */ + clause_mimg_load, + clause_mimg_store, + clause_mimg_atomic, + clause_mimg_sample, + clause_vmem_load, + clause_vmem_store, + clause_vmem_atomic, + clause_flat_load, + clause_flat_store, + clause_flat_atomic, + clause_bvh, }; void @@ -59,6 +72,175 @@ emit_clause(Builder& bld, unsigned num_instrs, aco_ptr* instrs) bld.insert(std::move(instrs[i])); } +clause_type +get_type(Program* program, aco_ptr& instr) +{ + if (instr->isSMEM() && !instr->operands.empty()) + return clause_smem; + + if (program->gfx_level >= GFX11) { + if (instr->isMIMG()) { + switch (instr->opcode) { + case aco_opcode::image_bvh_intersect_ray: + case aco_opcode::image_bvh64_intersect_ray: return clause_bvh; + case aco_opcode::image_atomic_swap: + case aco_opcode::image_atomic_cmpswap: + case aco_opcode::image_atomic_add: + case aco_opcode::image_atomic_sub: + case aco_opcode::image_atomic_rsub: + case aco_opcode::image_atomic_smin: + case aco_opcode::image_atomic_umin: + case aco_opcode::image_atomic_smax: + case aco_opcode::image_atomic_umax: + case aco_opcode::image_atomic_and: + case aco_opcode::image_atomic_or: + case aco_opcode::image_atomic_xor: + case aco_opcode::image_atomic_inc: + case aco_opcode::image_atomic_dec: + case aco_opcode::image_atomic_fcmpswap: + case aco_opcode::image_atomic_fmin: + case aco_opcode::image_atomic_fmax: return clause_mimg_atomic; + default: + if (instr->definitions.empty()) + return clause_mimg_store; + else + return !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4 + ? clause_mimg_sample + : clause_mimg_load; + } + } else if (instr->isMTBUF() || instr->isScratch()) { + return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; + } else if (instr->isMUBUF()) { + switch (instr->opcode) { + case aco_opcode::buffer_atomic_add: + case aco_opcode::buffer_atomic_and_x2: + case aco_opcode::buffer_atomic_rsub: + case aco_opcode::buffer_atomic_umax: + case aco_opcode::buffer_atomic_dec: + case aco_opcode::buffer_atomic_smax: + case aco_opcode::buffer_atomic_fmax: + case aco_opcode::buffer_atomic_rsub_x2: + case aco_opcode::buffer_atomic_smin: + case aco_opcode::buffer_atomic_sub: + case aco_opcode::buffer_atomic_sub_x2: + case aco_opcode::buffer_atomic_xor_x2: + case aco_opcode::buffer_atomic_add_f32: + case aco_opcode::buffer_atomic_inc: + case aco_opcode::buffer_atomic_swap_x2: + case aco_opcode::buffer_atomic_cmpswap: + case aco_opcode::buffer_atomic_fmin_x2: + case aco_opcode::buffer_atomic_umin: + case aco_opcode::buffer_atomic_or: + case aco_opcode::buffer_atomic_umax_x2: + case aco_opcode::buffer_atomic_smin_x2: + case aco_opcode::buffer_atomic_umin_x2: + case aco_opcode::buffer_atomic_cmpswap_x2: + case aco_opcode::buffer_atomic_add_x2: + case aco_opcode::buffer_atomic_swap: + case aco_opcode::buffer_atomic_and: + case aco_opcode::buffer_atomic_fmin: + case aco_opcode::buffer_atomic_fcmpswap_x2: + case aco_opcode::buffer_atomic_or_x2: + case aco_opcode::buffer_atomic_fcmpswap: + case aco_opcode::buffer_atomic_xor: + case aco_opcode::buffer_atomic_dec_x2: + case aco_opcode::buffer_atomic_fmax_x2: + case aco_opcode::buffer_atomic_csub: + case aco_opcode::buffer_atomic_inc_x2: + case aco_opcode::buffer_atomic_smax_x2: return clause_vmem_atomic; + default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; + } + } else if (instr->isGlobal()) { + switch (instr->opcode) { + case aco_opcode::global_atomic_swap: + case aco_opcode::global_atomic_umax: + case aco_opcode::global_atomic_cmpswap: + case aco_opcode::global_atomic_and_x2: + case aco_opcode::global_atomic_fmax: + case aco_opcode::global_atomic_smax_x2: + case aco_opcode::global_atomic_fmax_x2: + case aco_opcode::global_atomic_dec: + case aco_opcode::global_atomic_dec_x2: + case aco_opcode::global_atomic_umin: + case aco_opcode::global_atomic_fcmpswap_x2: + case aco_opcode::global_atomic_inc: + case aco_opcode::global_atomic_and: + case aco_opcode::global_atomic_fmin: + case aco_opcode::global_atomic_fcmpswap: + case aco_opcode::global_atomic_or_x2: + case aco_opcode::global_atomic_smax: + case aco_opcode::global_atomic_sub: + case aco_opcode::global_atomic_xor: + case aco_opcode::global_atomic_swap_x2: + case aco_opcode::global_atomic_umax_x2: + case aco_opcode::global_atomic_umin_x2: + case aco_opcode::global_atomic_xor_x2: + case aco_opcode::global_atomic_inc_x2: + case aco_opcode::global_atomic_fmin_x2: + case aco_opcode::global_atomic_add_f32: + case aco_opcode::global_atomic_add: + case aco_opcode::global_atomic_or: + case aco_opcode::global_atomic_add_x2: + case aco_opcode::global_atomic_smin_x2: + case aco_opcode::global_atomic_smin: + case aco_opcode::global_atomic_csub: + case aco_opcode::global_atomic_sub_x2: + case aco_opcode::global_atomic_cmpswap_x2: return clause_vmem_atomic; + default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; + } + } else if (instr->isFlat()) { + switch (instr->opcode) { + case aco_opcode::flat_atomic_smax: + case aco_opcode::flat_atomic_fcmpswap_x2: + case aco_opcode::flat_atomic_inc_x2: + case aco_opcode::flat_atomic_dec: + case aco_opcode::flat_atomic_fmin: + case aco_opcode::flat_atomic_umax_x2: + case aco_opcode::flat_atomic_add_f32: + case aco_opcode::flat_atomic_or: + case aco_opcode::flat_atomic_smax_x2: + case aco_opcode::flat_atomic_umin: + case aco_opcode::flat_atomic_sub: + case aco_opcode::flat_atomic_swap: + case aco_opcode::flat_atomic_swap_x2: + case aco_opcode::flat_atomic_cmpswap_x2: + case aco_opcode::flat_atomic_fcmpswap: + case aco_opcode::flat_atomic_add: + case aco_opcode::flat_atomic_umin_x2: + case aco_opcode::flat_atomic_xor_x2: + case aco_opcode::flat_atomic_smin: + case aco_opcode::flat_atomic_fmax_x2: + case aco_opcode::flat_atomic_cmpswap: + case aco_opcode::flat_atomic_dec_x2: + case aco_opcode::flat_atomic_sub_x2: + case aco_opcode::flat_atomic_add_x2: + case aco_opcode::flat_atomic_umax: + case aco_opcode::flat_atomic_xor: + case aco_opcode::flat_atomic_and_x2: + case aco_opcode::flat_atomic_inc: + case aco_opcode::flat_atomic_and: + case aco_opcode::flat_atomic_fmin_x2: + case aco_opcode::flat_atomic_smin_x2: + case aco_opcode::flat_atomic_or_x2: + case aco_opcode::flat_atomic_fmax: return clause_flat_atomic; + default: return instr->definitions.empty() ? clause_flat_store : clause_flat_load; + } + } + } else { + if (instr->isVMEM() && !instr->operands.empty()) { + if (program->gfx_level == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0) + return clause_other; + else + return clause_vmem; + } else if (instr->isScratch() || instr->isGlobal()) { + return clause_vmem; + } else if (instr->isFlat()) { + return clause_flat; + } + } + return clause_other; +} + } /* end namespace */ void @@ -76,21 +258,7 @@ form_hard_clauses(Program* program) for (unsigned i = 0; i < block.instructions.size(); i++) { aco_ptr& instr = block.instructions[i]; - clause_type type = clause_other; - if (instr->isVMEM() && !instr->operands.empty()) { - if (program->gfx_level == GFX10 && instr->isMIMG() && - get_mimg_nsa_dwords(instr.get()) > 0) - type = clause_other; - else - type = clause_vmem; - } else if (instr->isScratch() || instr->isGlobal()) { - type = clause_vmem; - } else if (instr->isFlat()) { - type = clause_flat; - } else if (instr->isSMEM() && !instr->operands.empty()) { - type = clause_smem; - } - + clause_type type = get_type(program, instr); if (type != current_type || num_instrs == 64 || (num_instrs && !should_form_clause(current_instrs[0].get(), instr.get()))) { emit_clause(bld, num_instrs, current_instrs); -- 2.7.4