aco: add can_use_DPP() and convert_to_DPP()

author Rhys Perry <pendingchaos02@gmail.com>

Wed, 14 Jul 2021 16:11:44 +0000 (17:11 +0100)

committer Marge Bot <eric+marge@anholt.net>

Thu, 19 Aug 2021 18:17:33 +0000 (18:17 +0000)
author Rhys Perry <pendingchaos02@gmail.com>
Wed, 14 Jul 2021 16:11:44 +0000 (17:11 +0100)
committer Marge Bot <eric+marge@anholt.net>
Thu, 19 Aug 2021 18:17:33 +0000 (18:17 +0000)
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp

index 79f9d71..8900e2f 100644 (file)
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -24,6 +24,8 @@
  
  #include "aco_ir.h"
  
+#include "aco_builder.h"
+
  #include "util/debug.h"
  
  #include "c11/threads.h"
@@ -303,6 +305,78 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
  }
  
  bool
+can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra)
+{
+   assert(instr->isVALU() && !instr->operands.empty());
+
+   if (instr->isDPP())
+      return true;
+
+   if (instr->operands.size() && instr->operands[0].isLiteral())
+      return false;
+
+   if (instr->isSDWA())
+      return false;
+
+   if (!pre_ra && (instr->isVOPC() || instr->definitions.size() > 1) &&
+       instr->definitions.back().physReg() != vcc)
+      return false;
+
+   if (!pre_ra && instr->operands.size() >= 3 && instr->operands[2].physReg() != vcc)
+      return false;
+
+   if (instr->isVOP3()) {
+      const VOP3_instruction* vop3 = &instr->vop3();
+      if (vop3->clamp || vop3->omod || vop3->opsel)
+         return false;
+      if (instr->format == Format::VOP3)
+         return false;
+   }
+
+   /* there are more cases but those all take 64-bit inputs */
+   return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
+          instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
+          instr->opcode != aco_opcode::v_readfirstlane_b32 &&
+          instr->opcode != aco_opcode::v_cvt_f64_i32 &&
+          instr->opcode != aco_opcode::v_cvt_f64_f32 && instr->opcode != aco_opcode::v_cvt_f64_u32;
+}
+
+aco_ptr<Instruction>
+convert_to_DPP(aco_ptr<Instruction>& instr)
+{
+   if (instr->isDPP())
+      return NULL;
+
+   aco_ptr<Instruction> tmp = std::move(instr);
+   Format format =
+      (Format)(((uint32_t)tmp->format & ~(uint32_t)Format::VOP3) | (uint32_t)Format::DPP);
+   instr.reset(create_instruction<DPP_instruction>(tmp->opcode, format, tmp->operands.size(),
+                                                   tmp->definitions.size()));
+   std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
+   for (unsigned i = 0; i < instr->definitions.size(); i++)
+      instr->definitions[i] = tmp->definitions[i];
+
+   DPP_instruction* dpp = &instr->dpp();
+   dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
+   dpp->row_mask = 0xf;
+   dpp->bank_mask = 0xf;
+
+   if (tmp->isVOP3()) {
+      const VOP3_instruction* vop3 = &tmp->vop3();
+      memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
+      memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
+   }
+
+   if (instr->isVOPC() || instr->definitions.size() > 1)
+      instr->definitions.back().setFixed(vcc);
+
+   if (instr->operands.size() >= 3)
+      instr->operands[2].setFixed(vcc);
+
+   return tmp;
+}
+
+bool
  can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high)
  {
     /* opsel is only GFX9+ */
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h

index c375b38..a739e5d 100644 (file)
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1730,8 +1730,10 @@ bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
  
  bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
  bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
+bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra);
  /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
  aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
+aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr);
  bool needs_exec_mask(const Instruction* instr);
  
  uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
author	Rhys Perry <pendingchaos02@gmail.com>
	Wed, 14 Jul 2021 16:11:44 +0000 (17:11 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 19 Aug 2021 18:17:33 +0000 (18:17 +0000)
src/amd/compiler/aco_ir.cpp		patch \| blob \| history
src/amd/compiler/aco_ir.h		patch \| blob \| history