aco: don't apply modifiers through DPP to unsupported instructions
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 8 Feb 2023 16:37:44 +0000 (16:37 +0000)
committerMarge Bot <emma+marge@anholt.net>
Tue, 21 Feb 2023 14:59:38 +0000 (14:59 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21201>

src/amd/compiler/aco_optimizer.cpp
src/amd/compiler/aco_optimizer_postRA.cpp
src/amd/compiler/tests/test_optimizer.cpp
src/amd/compiler/tests/test_optimizer_postRA.cpp

index 4557fd0..2043990 100644 (file)
@@ -4880,38 +4880,49 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             continue;
          ssa_info info = ctx.info[instr->operands[i].tempId()];
 
+         if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
+            continue;
+
          aco_opcode swapped_op;
-         if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
-             (i == 0 || can_swap_operands(instr, &swapped_op)) &&
-             can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
-            bool dpp8 = info.is_dpp8();
-            convert_to_DPP(instr, dpp8);
-            if (dpp8) {
-               DPP8_instruction* dpp = &instr->dpp8();
-               for (unsigned j = 0; j < 8; ++j)
-                  dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
-               if (i) {
-                  instr->opcode = swapped_op;
-                  std::swap(instr->operands[0], instr->operands[1]);
-               }
-            } else {
-               DPP16_instruction* dpp = &instr->dpp16();
-               if (i) {
-                  instr->opcode = swapped_op;
-                  std::swap(instr->operands[0], instr->operands[1]);
-                  std::swap(dpp->neg[0], dpp->neg[1]);
-                  std::swap(dpp->abs[0], dpp->abs[1]);
-               }
-               dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
-               dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
-               dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
-               dpp->abs[0] |= info.instr->dpp16().abs[0];
+         if (i != 0 && !can_swap_operands(instr, &swapped_op))
+            continue;
+
+         if (instr->isDPP() || !can_use_DPP(instr, true, info.is_dpp8()))
+            continue;
+
+         bool dpp8 = info.is_dpp8();
+         bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
+                           instr_info.operand_size[(int)instr->opcode] == 32;
+         if (!dpp8 && (info.instr->dpp16().neg[0] || info.instr->dpp16().abs[0]) && !input_mods)
+            continue;
+
+         convert_to_DPP(instr, dpp8);
+         if (dpp8) {
+            DPP8_instruction* dpp = &instr->dpp8();
+            for (unsigned j = 0; j < 8; ++j)
+               dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
+            if (i) {
+               instr->opcode = swapped_op;
+               std::swap(instr->operands[0], instr->operands[1]);
             }
-            if (--ctx.uses[info.instr->definitions[0].tempId()])
-               ctx.uses[info.instr->operands[0].tempId()]++;
-            instr->operands[0].setTemp(info.instr->operands[0].getTemp());
-            break;
+         } else {
+            DPP16_instruction* dpp = &instr->dpp16();
+            if (i) {
+               instr->opcode = swapped_op;
+               std::swap(instr->operands[0], instr->operands[1]);
+               std::swap(dpp->neg[0], dpp->neg[1]);
+               std::swap(dpp->abs[0], dpp->abs[1]);
+            }
+            dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
+            dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
+            dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
+            dpp->abs[0] |= info.instr->dpp16().abs[0];
          }
+
+         if (--ctx.uses[info.instr->definitions[0].tempId()])
+            ctx.uses[info.instr->operands[0].tempId()]++;
+         instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+         break;
       }
    }
 
index 91fc663..510b9e1 100644 (file)
@@ -511,6 +511,11 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
       if (i && !can_swap_operands(instr, &instr->opcode))
          continue;
 
+      bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
+                        instr_info.operand_size[(int)instr->opcode] == 32;
+      if (!dpp8 && (mov->dpp16().neg[0] || mov->dpp16().abs[0]) && !input_mods)
+         continue;
+
       if (!dpp8) /* anything else doesn't make sense in SSA */
          assert(mov->dpp16().row_mask == 0xf && mov->dpp16().bank_mask == 0xf);
 
index e501fd0..043602d 100644 (file)
@@ -1064,6 +1064,22 @@ BEGIN_TEST(optimizer.dpp)
    res7->vop3().abs[0] = true;
    writeout(7, res7);
 
+   //! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
+   //! v1: %res11 = v_add_u32 %tmp11, %b
+   //! p_unit_test 11, %res11
+   auto tmp11 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+   tmp11->dpp16().neg[0] = true;
+   Temp res11 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), tmp11, b);
+   writeout(11, res11);
+
+   //! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1
+   //! v1: %res12 = v_add_f16 %tmp12, %b
+   //! p_unit_test 12, %res12
+   auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
+   tmp12->dpp16().neg[0] = true;
+   Temp res12 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1), tmp12, b);
+   writeout(12, res12);
+
    /* vcc */
    //! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
    //! p_unit_test 8, %res8
index c5f0a3b..066f74f 100644 (file)
@@ -409,6 +409,22 @@ BEGIN_TEST(optimizer_postRA.dpp)
    res7->vop3().abs[0] = true;
    writeout(7, Operand(res7, reg_v2));
 
+   //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+   //! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1]
+   //! p_unit_test 12, %res12:v[2]
+   auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+   tmp12->dpp16().neg[0] = true;
+   Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), Operand(tmp12, reg_v2), b);
+   writeout(12, Operand(res12, reg_v2));
+
+   //! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+   //! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1]
+   //! p_unit_test 13, %res13:v[2]
+   auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
+   tmp13->dpp16().neg[0] = true;
+   Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), Operand(tmp13, reg_v2), b);
+   writeout(13, Operand(res13, reg_v2));
+
    /* vcc */
    //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
    //! p_unit_test 8, %res8:v[2]