From: Georg Lehmann Date: Sat, 29 Jul 2023 16:47:04 +0000 (+0200) Subject: aco: combine a & ~b to bfi(b, 0, a) X-Git-Tag: upstream/23.3.3~4902 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1659d982c32bb9856c78ef079285ddcdfc6c70ce;p=platform%2Fupstream%2Fmesa.git aco: combine a & ~b to bfi(b, 0, a) Foz-DB Navi21: Totals from 905 (0.68% of 132657) affected shaders: Instrs: 1223583 -> 1221016 (-0.21%); split: -0.22%, +0.01% CodeSize: 6567272 -> 6567064 (-0.00%); split: -0.04%, +0.03% SpillSGPRs: 1231 -> 1223 (-0.65%) SpillVGPRs: 829 -> 823 (-0.72%); split: -1.45%, +0.72% Latency: 40952209 -> 40946230 (-0.01%); split: -0.02%, +0.01% InvThroughput: 9411929 -> 9397932 (-0.15%); split: -0.17%, +0.02% VClause: 29108 -> 29112 (+0.01%); split: -0.04%, +0.05% Copies: 105272 -> 105221 (-0.05%); split: -0.28%, +0.23% Branches: 29330 -> 29329 (-0.00%) Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0b79899..16e0450 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3588,6 +3588,45 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr& instr) return false; } +/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a) */ +bool +combine_v_and_not(opt_ctx& ctx, aco_ptr& instr) +{ + if (instr->usesModifiers()) + return false; + + for (unsigned i = 0; i < 2; i++) { + Instruction* op_instr = follow_operand(ctx, instr->operands[i], true); + if (op_instr && !op_instr->usesModifiers() && + (op_instr->opcode == aco_opcode::v_not_b32 || + op_instr->opcode == aco_opcode::s_not_b32)) { + + Operand ops[3] = { + op_instr->operands[0], + Operand::zero(), + instr->operands[!i], + }; + if (!check_vop3_operands(ctx, 3, ops)) + continue; + + Instruction* new_instr = + create_instruction(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1); + + new_instr->operands[0] = copy_operand(ctx, op_instr->operands[0]); + new_instr->operands[1] = Operand::zero(); + new_instr->operands[2] = instr->operands[!i]; + new_instr->definitions[0] = instr->definitions[0]; + new_instr->pass_flags = instr->pass_flags; + instr.reset(new_instr); + decrease_uses(ctx, op_instr); + ctx.info[instr->definitions[0].tempId()].label = 0; + return true; + } + } + + return false; +} + /* v_add_co(c, s_lshl(a, b)) -> v_mad_u32_u24(a, 1< v_mad_u32_u24(b, 1< v_mad_i32_i24(a, -(1<& instr) } else if (instr->opcode == aco_opcode::s_abs_i32) { combine_sabsdiff(ctx, instr); } else if (instr->opcode == aco_opcode::v_and_b32) { - combine_and_subbrev(ctx, instr); + if (combine_and_subbrev(ctx, instr)) { + } else if (combine_v_and_not(ctx, instr)) { + } } else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) { /* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32. * since ctx.uses[mad_info::mul_temp_id] is always 0, we don't have to worry about