if (ctx.uses[instr->operands[0].tempId()] > 1)
return;
- /* Make sure both SCC and Operand 0 are written by the same instruction. */
+ /* Find the writer instruction of Operand 0. */
Idx wr_idx = last_writer_idx(ctx, instr->operands[0]);
- Idx sccwr_idx = last_writer_idx(ctx, scc, s1);
- if (!wr_idx.found() || wr_idx != sccwr_idx)
+ if (!wr_idx.found())
return;
Instruction* wr_instr = ctx.get(wr_idx);
default: return;
}
+ /* Check whether both SCC and Operand 0 are written by the same instruction. */
+ Idx sccwr_idx = last_writer_idx(ctx, scc, s1);
+ if (wr_idx != sccwr_idx) {
+ /* Check whether the current instruction is the only user of its first operand. */
+ if (ctx.uses[wr_instr->definitions[1].tempId()] ||
+ ctx.uses[wr_instr->definitions[0].tempId()] > 1)
+ return;
+
+ /* Check whether the operands of the writer are clobbered. */
+ for (const Operand& op : wr_instr->operands) {
+ if (!op.isConstant() && is_clobbered_since(ctx, op, wr_idx))
+ return;
+ }
+
+ aco_opcode pulled_opcode = wr_instr->opcode;
+ if (instr->opcode == aco_opcode::s_cmp_eq_u32 ||
+ instr->opcode == aco_opcode::s_cmp_eq_i32 ||
+ instr->opcode == aco_opcode::s_cmp_eq_u64) {
+ /* When s_cmp_eq is used, it effectively inverts the SCC def.
+ * However, we can't simply invert the opcodes here because that
+ * would change the meaning of the program.
+ */
+ return;
+ }
+
+ Definition scc_def = instr->definitions[0];
+ ctx.uses[wr_instr->definitions[0].tempId()]--;
+
+ /* Copy the writer instruction, but use SCC from the current instr.
+ * This means that the original instruction will be eliminated.
+ */
+ if (wr_instr->format == Format::SOP2) {
+ instr.reset(create_instruction<SOP2_instruction>(pulled_opcode, Format::SOP2, 2, 2));
+ instr->operands[1] = wr_instr->operands[1];
+ } else if (wr_instr->format == Format::SOP1) {
+ instr.reset(create_instruction<SOP1_instruction>(pulled_opcode, Format::SOP1, 1, 2));
+ }
+ instr->definitions[0] = wr_instr->definitions[0];
+ instr->definitions[1] = scc_def;
+ instr->operands[0] = wr_instr->operands[0];
+ return;
+ }
+
/* Use the SCC def from wr_instr */
ctx.uses[instr->operands[0].tempId()]--;
instr->operands[0] = Operand(wr_instr->definitions[1].getTemp(), scc);
//; del d, e, f, g, h, x
{
+ /* SCC is overwritten in between, optimize by pulling down */
+
+ //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
+ //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+ //! s2: %f:vcc = p_cbranch_z %g:scc
+ //! p_unit_test 5, %f:vcc, %h:s[3]
+ auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+ Operand::c32(0x40018u));
+ auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
+ Operand::c32(1u));
+ auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+ Operand::zero());
+ auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+ writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
+ }
+
+ //; del d, e, f, g, h, x
+
+ {
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
//! p_unit_test 6, %f:s[4]