continue;
ssa_info info = ctx.info[instr->operands[i].tempId()];
+ if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
+ continue;
+
aco_opcode swapped_op;
- if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
- (i == 0 || can_swap_operands(instr, &swapped_op)) &&
- can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
- bool dpp8 = info.is_dpp8();
- convert_to_DPP(instr, dpp8);
- if (dpp8) {
- DPP8_instruction* dpp = &instr->dpp8();
- for (unsigned j = 0; j < 8; ++j)
- dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
- if (i) {
- instr->opcode = swapped_op;
- std::swap(instr->operands[0], instr->operands[1]);
- }
- } else {
- DPP16_instruction* dpp = &instr->dpp16();
- if (i) {
- instr->opcode = swapped_op;
- std::swap(instr->operands[0], instr->operands[1]);
- std::swap(dpp->neg[0], dpp->neg[1]);
- std::swap(dpp->abs[0], dpp->abs[1]);
- }
- dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
- dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
- dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
- dpp->abs[0] |= info.instr->dpp16().abs[0];
+ if (i != 0 && !can_swap_operands(instr, &swapped_op))
+ continue;
+
+ if (instr->isDPP() || !can_use_DPP(instr, true, info.is_dpp8()))
+ continue;
+
+ bool dpp8 = info.is_dpp8();
+ bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
+ instr_info.operand_size[(int)instr->opcode] == 32;
+ if (!dpp8 && (info.instr->dpp16().neg[0] || info.instr->dpp16().abs[0]) && !input_mods)
+ continue;
+
+ convert_to_DPP(instr, dpp8);
+ if (dpp8) {
+ DPP8_instruction* dpp = &instr->dpp8();
+ for (unsigned j = 0; j < 8; ++j)
+ dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
+ if (i) {
+ instr->opcode = swapped_op;
+ std::swap(instr->operands[0], instr->operands[1]);
}
- if (--ctx.uses[info.instr->definitions[0].tempId()])
- ctx.uses[info.instr->operands[0].tempId()]++;
- instr->operands[0].setTemp(info.instr->operands[0].getTemp());
- break;
+ } else {
+ DPP16_instruction* dpp = &instr->dpp16();
+ if (i) {
+ instr->opcode = swapped_op;
+ std::swap(instr->operands[0], instr->operands[1]);
+ std::swap(dpp->neg[0], dpp->neg[1]);
+ std::swap(dpp->abs[0], dpp->abs[1]);
+ }
+ dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
+ dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
+ dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
+ dpp->abs[0] |= info.instr->dpp16().abs[0];
}
+
+ if (--ctx.uses[info.instr->definitions[0].tempId()])
+ ctx.uses[info.instr->operands[0].tempId()]++;
+ instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+ break;
}
}
if (i && !can_swap_operands(instr, &instr->opcode))
continue;
+ bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
+ instr_info.operand_size[(int)instr->opcode] == 32;
+ if (!dpp8 && (mov->dpp16().neg[0] || mov->dpp16().abs[0]) && !input_mods)
+ continue;
+
if (!dpp8) /* anything else doesn't make sense in SSA */
assert(mov->dpp16().row_mask == 0xf && mov->dpp16().bank_mask == 0xf);
res7->vop3().abs[0] = true;
writeout(7, res7);
+ //! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
+ //! v1: %res11 = v_add_u32 %tmp11, %b
+ //! p_unit_test 11, %res11
+ auto tmp11 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+ tmp11->dpp16().neg[0] = true;
+ Temp res11 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), tmp11, b);
+ writeout(11, res11);
+
+ //! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1
+ //! v1: %res12 = v_add_f16 %tmp12, %b
+ //! p_unit_test 12, %res12
+ auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+ tmp12->dpp16().neg[0] = true;
+ Temp res12 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1), tmp12, b);
+ writeout(12, res12);
+
/* vcc */
//! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8
res7->vop3().abs[0] = true;
writeout(7, Operand(res7, reg_v2));
+ //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+ //! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1]
+ //! p_unit_test 12, %res12:v[2]
+ auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ tmp12->dpp16().neg[0] = true;
+ Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), Operand(tmp12, reg_v2), b);
+ writeout(12, Operand(res12, reg_v2));
+
+ //! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+ //! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1]
+ //! p_unit_test 13, %res13:v[2]
+ auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ tmp13->dpp16().neg[0] = true;
+ Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), Operand(tmp13, reg_v2), b);
+ writeout(13, Operand(res13, reg_v2));
+
/* vcc */
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8:v[2]