aco/statistics: improve v_fma_mix dual issuing detection
authorGeorg Lehmann <dadschoorse@gmail.com>
Thu, 11 May 2023 15:42:12 +0000 (17:42 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 7 Jun 2023 12:30:11 +0000 (12:30 +0000)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21402>

src/amd/compiler/aco_statistics.cpp

index 3403760..9b0ee8a 100644 (file)
@@ -105,12 +105,12 @@ struct perf_info {
 };
 
 static bool
-is_dual_issue_capable(const Program& program, const Instruction& instruction)
+is_dual_issue_capable(const Program& program, const Instruction& instr)
 {
-   if (program.gfx_level < GFX11 || !instruction.isVALU() || instruction.isDPP())
+   if (program.gfx_level < GFX11 || !instr.isVALU() || instr.isDPP())
       return false;
 
-   switch (instruction.opcode) {
+   switch (instr.opcode) {
    case aco_opcode::v_fma_f32:
    case aco_opcode::v_fmac_f32:
    case aco_opcode::v_fmaak_f32:
@@ -122,9 +122,6 @@ is_dual_issue_capable(const Program& program, const Instruction& instruction)
    case aco_opcode::v_mul_legacy_f32:
    case aco_opcode::v_fma_legacy_f32:
    case aco_opcode::v_fmac_legacy_f32:
-   case aco_opcode::v_fma_mix_f32:
-   case aco_opcode::v_fma_mixlo_f16:
-   case aco_opcode::v_fma_mixhi_f16:
    case aco_opcode::v_fma_f16:
    case aco_opcode::v_fmac_f16:
    case aco_opcode::v_fmaak_f16:
@@ -169,6 +166,24 @@ is_dual_issue_capable(const Program& program, const Instruction& instruction)
    case aco_opcode::v_dot2_f16_f16:
    case aco_opcode::v_dot2_f32_f16:
    case aco_opcode::v_dot2c_f32_f16: return true;
+   case aco_opcode::v_fma_mix_f32:
+   case aco_opcode::v_fma_mixlo_f16:
+   case aco_opcode::v_fma_mixhi_f16: {
+      /* dst and acc type must match */
+      if (instr.valu().opsel_hi[2] == (instr.opcode == aco_opcode::v_fma_mix_f32))
+         return false;
+
+      /* If all operands are vgprs, two must be the same. */
+      for (unsigned i = 0; i < 3; i++) {
+         if (instr.operands[i].isConstant() || instr.operands[i].isOfType(RegType::sgpr))
+            return true;
+         for (unsigned j = 0; j < i; j++) {
+            if (instr.operands[i].physReg() == instr.operands[j].physReg())
+               return true;
+         }
+      }
+      return false;
+   }
    default: return false;
    }
 }