From: Daniel Schürmann Date: Fri, 4 Feb 2022 16:13:19 +0000 (+0100) Subject: aco: optimize discard_if when WQM is not needed afterwards X-Git-Tag: upstream/22.3.5~12907 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5e9df85b1a4504c5b4162e77e139056dc80accc6;p=platform%2Fupstream%2Fmesa.git aco: optimize discard_if when WQM is not needed afterwards Totals from 11560 (8.57% of 134913) affected shaders: (GFX10.3) CodeSize: 12092560 -> 11997652 (-0.78%) Instrs: 2205325 -> 2181598 (-1.08%) Latency: 15376048 -> 15356958 (-0.12%); split: -0.12%, +0.00% InvThroughput: 3526105 -> 3525120 (-0.03%); split: -0.03%, +0.00% Copies: 98543 -> 87601 (-11.10%) Branches: 16919 -> 16873 (-0.27%) PreSGPRs: 291584 -> 291532 (-0.02%) Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index c986039..1b0c0b6 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -708,11 +708,29 @@ process_instructions(exec_ctx& ctx, Block* block, std::vectorindex].instr_needs[idx] : Unspecified; + if (needs == WQM && state != WQM) { + transition_to_WQM(ctx, bld, block->index); + state = WQM; + } else if (needs == Exact && state != Exact) { + transition_to_Exact(ctx, bld, block->index); + state = Exact; + } + if (instr->opcode == aco_opcode::p_discard_if) { - if (ctx.info[block->index].block_needs & Preserve_WQM) { - assert(block->kind & block_kind_top_level); - transition_to_WQM(ctx, bld, block->index); - ctx.info[block->index].exec.back().second &= ~mask_type_global; + Operand current_exec = Operand(exec, bld.lm); + + if (block->kind & block_kind_top_level) { + if (needs == Preserve_WQM) { + /* Preserve the WQM mask */ + transition_to_WQM(ctx, bld, block->index); + ctx.info[block->index].exec.back().second &= ~mask_type_global; + } else if (ctx.info[block->index].exec.size() == 2) { + assert(state == WQM); + /* Transition to Exact without extra instruction */ + ctx.info[block->index].exec.pop_back(); + current_exec = get_exec_op(ctx.info[block->index].exec.back().first); + ctx.info[block->index].exec[0].first = Operand(bld.lm); + } } Temp cond, exit_cond; @@ -727,7 +745,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vectoroperands[0].getTemp(); /* discard from current exec */ exit_cond = bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), - Operand(exec, bld.lm), cond) + current_exec, cond) .def(1) .getTemp(); } @@ -745,15 +763,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vectoroperands[0] = bld.scc(exit_cond); assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0); - } else if (needs == WQM && state != WQM) { - transition_to_WQM(ctx, bld, block->index); - state = WQM; - } else if (needs == Exact && state != Exact) { - transition_to_Exact(ctx, bld, block->index); - state = Exact; - } - - if (instr->opcode == aco_opcode::p_is_helper) { + } else if (instr->opcode == aco_opcode::p_is_helper) { Definition dst = instr->definitions[0]; assert(dst.size() == bld.lm.size()); if (state == Exact) {