finish_optimizer_postRA_test();
END_TEST
+
+BEGIN_TEST(optimizer_postRA.dpp)
+ //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm
+ if (!setup_cs("v1 v1 s2 s2", GFX10_3))
+ return;
+
+ bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
+ bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
+ bld.instructions->at(0)->definitions[2].setFixed(vcc);
+ bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0));
+
+ PhysReg reg_v0(256);
+ PhysReg reg_v2(258);
+ Operand a(inputs[0], PhysReg(256));
+ Operand b(inputs[1], PhysReg(257));
+ Operand c(inputs[2], vcc);
+ Operand d(inputs[3], PhysReg(0));
+
+ /* basic optimization */
+ //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 0, %res0:v[2]
+ Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
+ writeout(0, Operand(res0, reg_v2));
+
+ /* operand swapping */
+ //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 1, %res1:v[2]
+ Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2));
+ writeout(1, Operand(res1, reg_v2));
+
+ //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
+ //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
+ //! p_unit_test 2, %res2:v[2]
+ Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
+ writeout(2, Operand(res2, reg_v2));
+
+ /* modifiers */
+ //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 3, %res3:v[2]
+ auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ tmp3.instr->dpp().neg[0] = true;
+ Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
+ writeout(3, Operand(res3, reg_v2));
+
+ //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 4, %res4:v[2]
+ Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
+ res4.instr->vop3().neg[0] = true;
+ writeout(4, Operand(res4, reg_v2));
+
+ //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
+ //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp
+ //! p_unit_test 5, %res5:v[2]
+ Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
+ res5.instr->vop3().clamp = true;
+ writeout(5, Operand(res5, reg_v2));
+
+ //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 6, %res6:v[2]
+ auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ tmp6.instr->dpp().neg[0] = true;
+ auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
+ res6.instr->vop3().abs[0] = true;
+ writeout(6, Operand(res6, reg_v2));
+
+ //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
+ //! p_unit_test 7, %res7:v[2]
+ Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
+ res7.instr->vop3().abs[0] = true;
+ writeout(7, Operand(res7, reg_v2));
+
+ /* vcc */
+ //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
+ //! p_unit_test 8, %res8:v[2]
+ Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
+ writeout(8, Operand(res8, reg_v2));
+
+ //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
+ //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
+ //! p_unit_test 9, %res9:v[2]
+ Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
+ writeout(9, Operand(res9, reg_v2));
+
+ /* control flow */
+ //! BB1
+ //! /* logical preds: / linear preds: BB0, / kind: uniform, */
+ //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
+ //! p_unit_test 10, %res10:v[2]
+ Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+
+ bld.reset(program->create_and_insert_block());
+ program->blocks[0].linear_succs.push_back(1);
+ program->blocks[1].linear_preds.push_back(0);
+
+ Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
+ writeout(10, Operand(res10, reg_v2));
+
+ /* can't combine if the v_mov_b32's operand is modified */
+ //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
+ //! v1: %tmp11_2:v[0] = v_mov_b32 0
+ //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1]
+ //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0]
+ Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+ Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0));
+ Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b);
+ writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0));
+
+ finish_optimizer_postRA_test();
+END_TEST
+