r600/sfn: Cleanup copy-prop into vec4 source values
authorGert Wollny <gert.wollny@collabora.com>
Wed, 7 Dec 2022 17:16:57 +0000 (18:16 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 9 Dec 2022 08:26:31 +0000 (08:26 +0000)
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20205>

src/gallium/drivers/r600/sfn/sfn_optimizer.cpp

index 6bf5aff..31b4306 100644 (file)
@@ -438,103 +438,143 @@ static bool register_chan_is_pinned(Pin pin)
 
 
 void
-CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
+CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
 {
+   /* Collect parent instructions - only ALU move without modifiers
+    * and without indirect access are allowed. */
    AluInstr *parents[4] = {nullptr};
+   bool have_candidates = false;
    for (int i = 0; i < 4; ++i) {
-      if (src[i]->chan() < 4 && src[i]->has_flag(Register::ssa)) {
+      if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
          /*  We have a pre-define value, so we can't propagate a copy */
-         if (src[i]->parents().empty())
+         if (value[i]->parents().empty())
             return;
 
-         assert(src[i]->parents().size() == 1);
-         parents[i] = (*src[i]->parents().begin())->as_alu();
+         assert(value[i]->parents().size() == 1);
+         parents[i] = (*value[i]->parents().begin())->as_alu();
+
+                       /* Parent op is not an ALU instruction, so we can't
+                               copy-propagate */
+                       if (!parents[i])
+                               return; 
+
+         if ((parents[i]->opcode() != op1_mov) ||
+             parents[i]->has_alu_flag(alu_src0_neg) ||
+             parents[i]->has_alu_flag(alu_src0_abs) ||
+             parents[i]->has_alu_flag(alu_dst_clamp) ||
+             parents[i]->has_alu_flag(alu_src0_rel) ||
+             std::get<0>(parents[i]->indirect_addr()))
+            return;
+         have_candidates = true;
       }
    }
+
+   if (!have_candidates)
+      return;
+
+   /* Collect the new source registers. We may have to move all registers
+    * to a new virtual sel index. */
+
    PRegister new_src[4] = {0};
+   int new_chan[4] = {0,0,0,0};
 
-   uint8_t mask = 0;
+   uint8_t used_chan_mask = 0;
    int new_sel = -1;
    bool all_sel_can_change = true;
 
    bool is_ssa = true;
-   int new_chan[4] = {0,0,0,0};
 
    for (int i = 0; i < 4; ++i) {
-      unsigned allowed_mask = 0xf & ~mask;
+
+      /* No parent means we either ignore the channel or insert 0 or 1.*/
       if (!parents[i])
          continue;
-      if ((parents[i]->opcode() != op1_mov) || parents[i]->has_alu_flag(alu_src0_neg) ||
-          parents[i]->has_alu_flag(alu_src0_abs) ||
-          parents[i]->has_alu_flag(alu_dst_clamp) ||
-          parents[i]->has_alu_flag(alu_src0_rel) ||
-          std::get<0>(parents[i]->indirect_addr())) {
+
+      unsigned allowed_mask = 0xf & ~used_chan_mask;
+
+      auto src = parents[i]->src(0).as_register();
+      if (!src)
          return;
-      } else {
-         auto src = parents[i]->src(0).as_register();
-         if (!src)
-            return;
-         if (src->pin() == pin_array)
-            return;
-         if (!src->has_flag(Register::ssa) &&
-             !assigned_register_direct(src)) {
-            return;
-         }
-         if (register_chan_is_pinned(src->pin())) {
-            allowed_mask = 1 << src->chan();
-         }
-         new_chan[i] = src->chan();
 
-         for (auto p : src->parents()) {
-            auto alu = p->as_alu();
-            if (alu)
-               allowed_mask &= alu->allowed_dest_chan_mask();
-         }
-         if (!allowed_mask) {
-            return;
-         }
+      /* Don't accept an array element for now, we would need extra checking
+       * that the value is not overwritten by an indirect access */
+      if (src->pin() == pin_array)
+         return;
 
-         if (new_sel < 0) {
-            new_sel = src->sel();
-            is_ssa = src->has_flag(Register::ssa);
-            new_chan[i] = src->chan();
-         } else if (new_sel != src->sel()) {
-            if (all_sel_can_change &&
-                register_sel_can_change(src->pin()) &&
-                (is_ssa == src->has_flag(Register::ssa))) {
-               new_chan[i] = u_bit_scan(&allowed_mask);
-               new_sel = value_factory.new_register_index();
-            } else
-               return;
-         }
+      /* Is this check still needed ? */
+      if (!src->has_flag(Register::ssa) &&
+          !assigned_register_direct(src)) {
+         return;
+      }
 
-         new_src[i] = src;
-         mask |= 1 << new_chan[i];
-         if (!register_sel_can_change(src->pin()))
-            all_sel_can_change = false;
+      /* If the channel chan't switch we have to update the channel mask
+       * TODO: assign channel pinned registers first might give more
+       *  opportunities for this optimization */
+      if (register_chan_is_pinned(src->pin()))
+         allowed_mask = 1 << src->chan();
+
+      /* Update the possible channel mask based on the sourcee's parent
+       * instruction(s) */
+      for (auto p : src->parents()) {
+         auto alu = p->as_alu();
+         if (alu)
+            allowed_mask &= alu->allowed_dest_chan_mask();
       }
+
+      if (!allowed_mask)
+         return;
+
+      /* Prefer keeping the channel, but if that's not possible
+       * i.e. if the sel has to change, then  pick the next free channel
+       * (see below) */
+      new_chan[i] = src->chan();
+
+      if (new_sel < 0) {
+         new_sel = src->sel();
+         is_ssa = src->has_flag(Register::ssa);
+      } else if (new_sel != src->sel()) {
+         /* If we have to assign a new register sel index do so only
+          * if all already assigned source can get a new register index,
+          * and all registers are either SSA or registers.
+          * TODO: check whether this last restriction is required */
+         if (all_sel_can_change &&
+             register_sel_can_change(src->pin()) &&
+             (is_ssa == src->has_flag(Register::ssa))) {
+            new_sel = value_factory.new_register_index();
+            new_chan[i] = u_bit_scan(&allowed_mask);
+         } else /* Sources can't be combined to a vec4 so bail out */
+            return;
+      }
+
+      new_src[i] = src;
+      used_chan_mask |= 1 << new_chan[i];
+      if (!register_sel_can_change(src->pin()))
+         all_sel_can_change = false;
    }
 
+   /* Apply the changes to the vec4 source */
+   value.del_use(instr);
    for (int i = 0; i < 4; ++i) {
       if (parents[i]) {
-         src.del_use(instr);
          new_src[i]->set_sel(new_sel);
          if (is_ssa)
             new_src[i]->set_flag(Register::ssa);
          new_src[i]->set_chan(new_chan[i]);
-         src.set_value(i, new_src[i]);
+
+         value.set_value(i, new_src[i]);
+
          if (new_src[i]->pin() != pin_fully) {
             if (new_src[i]->pin() == pin_chan)
                new_src[i]->set_pin(pin_chgr);
             else
                new_src[i]->set_pin(pin_group);
          }
-         src.add_use(instr);
          progress |= true;
       }
    }
+   value.add_use(instr);
    if (progress)
-      src.validate();
+      value.validate();
 }
 
 bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)