freedreno/ir3: consider instruction neighbors in cp
authorRob Clark <robclark@freedesktop.org>
Fri, 24 Oct 2014 13:27:37 +0000 (09:27 -0400)
committerRob Clark <robclark@freedesktop.org>
Sat, 25 Oct 2014 16:07:43 +0000 (12:07 -0400)
Fanin (merge) nodes require it's srcs to be "adjacent" in consecutive
scalar registers.  Keep track of instruction neighbors in copy-
propagation step and avoid eliminating mov's which would cause an
instruction to need multiple distinct left and/or right neighbors.

This lets us not fall on our face when we encounter things like:

  1: MOV TEMP[2], IN[0].xyzw
  2: TEX OUT[0].xy, TEMP[2], SAMP[0], SHADOW2D
  3: MOV TEMP[2].xy, IN[0].yxzz
  4: TEX OUT[0].zw, TEMP[2], SAMP[0], SHADOW2D
  5: END

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_cp.c

index 20d97ba..8a5e9fd 100644 (file)
@@ -216,6 +216,19 @@ struct ir3_instruction {
                 */
 #define DEPTH_UNUSED  ~0
                unsigned depth;
+
+               /* Used just during cp stage, which comes before depth pass.
+                * For fanin, where we need a sequence of consecutive registers,
+                * keep track of each src instructions left (ie 'n-1') and right
+                * (ie 'n+1') neighbor.  The front-end must insert enough mov's
+                * to ensure that each instruction has at most one left and at
+                * most one right neighbor.  During the copy-propagation pass,
+                * we only remove mov's when we can preserve this constraint.
+                */
+               struct {
+                       struct ir3_instruction *left, *right;
+                       uint16_t left_cnt, right_cnt;
+               } cp;
        };
        struct ir3_instruction *next;
 #ifdef DEBUG
index 83bcb7a..2076b62 100644 (file)
  *    Rob Clark <robclark@freedesktop.org>
  */
 
+#include "freedreno_util.h"
+
 #include "ir3.h"
 
 /*
  * Copy Propagate:
  *
- * TODO probably want some sort of visitor sort of interface to
- * avoid duplicating the same graph traversal logic everywhere..
- *
  */
 
 static void block_cp(struct ir3_block *block);
 static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep);
 
+/* XXX move this somewhere useful (and rename?) */
+static struct ir3_instruction *ssa(struct ir3_register *reg)
+{
+       if (reg->flags & IR3_REG_SSA)
+               return reg->instr;
+       return NULL;
+}
+
+static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
+{
+       return (a && b) && (a != b);
+}
+
+static void set_neighbors(struct ir3_instruction *instr,
+               struct ir3_instruction *left, struct ir3_instruction *right)
+{
+       debug_assert(!conflicts(instr->cp.left, left));
+       if (left) {
+               instr->cp.left_cnt++;
+               instr->cp.left = left;
+       }
+       debug_assert(!conflicts(instr->cp.right, right));
+       if (right) {
+               instr->cp.right_cnt++;
+               instr->cp.right = right;
+       }
+}
+
+/* remove neighbor reference, clearing left/right neighbor ptrs when
+ * there are no more references:
+ */
+static void remove_neighbors(struct ir3_instruction *instr)
+{
+       if (instr->cp.left) {
+               if (--instr->cp.left_cnt == 0)
+                       instr->cp.left = NULL;
+       }
+       if (instr->cp.right) {
+               if (--instr->cp.right_cnt == 0)
+                       instr->cp.right = NULL;
+       }
+}
+
+/* stop condition for iteration: */
+static bool check_stop(struct ir3_instruction *instr)
+{
+       if (ir3_instr_check_mark(instr))
+               return true;
+
+       /* stay within the block.. don't try to operate across
+        * basic block boundaries or we'll have problems when
+        * dealing with multiple basic blocks:
+        */
+       if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
+               return true;
+
+       return false;
+}
+
 static bool is_eligible_mov(struct ir3_instruction *instr)
 {
        if ((instr->category == 1) &&
                        (instr->cat1.src_type == instr->cat1.dst_type)) {
                struct ir3_register *dst = instr->regs[0];
                struct ir3_register *src = instr->regs[1];
+               struct ir3_instruction *src_instr = ssa(src);
                if (dst->flags & IR3_REG_ADDR)
                        return false;
-               if ((src->flags & IR3_REG_SSA) &&
-                               /* TODO: propagate abs/neg modifiers if possible */
-                               !(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV)))
+               /* TODO: propagate abs/neg modifiers if possible */
+               if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))
+                       return false;
+               if (src_instr) {
+                       /* check that eliminating the move won't result in
+                        * a neighbor conflict, ie. if an instruction feeds
+                        * into multiple fanins it can still only have at
+                        * most one left and one right neighbor:
+                        */
+                       if (conflicts(instr->cp.left, src_instr->cp.left))
+                               return false;
+                       if (conflicts(instr->cp.right, src_instr->cp.right))
+                               return false;
                        return true;
+               }
        }
        return false;
 }
@@ -95,6 +165,9 @@ instr_cp_fanin(struct ir3_instruction *instr)
                        /* we can't have 2 registers referring to the same instruction, so
                         * go through and check if any already refer to the candidate
                         * instruction. if so, don't do the propagation.
+                        *
+                        * NOTE: we need to keep this, despite the neighbor
+                        * conflict checks, to avoid A<->B<->A..
                         */
                        for (j = 1; j < instr->regs_count; j++)
                                if (instr->regs[j]->instr == cand)
@@ -107,22 +180,23 @@ instr_cp_fanin(struct ir3_instruction *instr)
        walk_children(instr, false);
 
        return instr;
-
 }
 
 static struct ir3_instruction *
 instr_cp(struct ir3_instruction *instr, bool keep)
 {
        /* if we've already visited this instruction, bail now: */
-       if (ir3_instr_check_mark(instr))
+       if (check_stop(instr))
                return instr;
 
        if (is_meta(instr) && (instr->opc == OPC_META_FI))
                return instr_cp_fanin(instr);
 
-       if (is_eligible_mov(instr) && !keep) {
-               struct ir3_register *src = instr->regs[1];
-               return instr_cp(src->instr, false);
+       if (!keep && is_eligible_mov(instr)) {
+               struct ir3_instruction *src_instr = ssa(instr->regs[1]);
+               set_neighbors(src_instr, instr->cp.left, instr->cp.right);
+               remove_neighbors(instr);
+               return instr_cp(src_instr, false);
        }
 
        walk_children(instr, false);
@@ -159,8 +233,88 @@ static void block_cp(struct ir3_block *block)
        }
 }
 
+/*
+ * Find instruction neighbors:
+ */
+
+static void instr_find_neighbors(struct ir3_instruction *instr)
+{
+       unsigned i;
+
+       if (check_stop(instr))
+               return;
+
+       if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
+               unsigned n = instr->regs_count;
+               for (i = 1; i < n; i++) {
+                       struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+                       if (src_instr) {
+                               struct ir3_instruction *left = (i > 1) ?
+                                               ssa(instr->regs[i-1]) : NULL;
+                               struct ir3_instruction *right = (i < (n - 1)) ?
+                                               ssa(instr->regs[i+1]) : NULL;
+                               set_neighbors(src_instr, left, right);
+                               instr_find_neighbors(src_instr);
+                       }
+               }
+       } else {
+               for (i = 1; i < instr->regs_count; i++) {
+                       struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+                       if (src_instr)
+                               instr_find_neighbors(src_instr);
+               }
+       }
+}
+
+static void block_find_neighbors(struct ir3_block *block)
+{
+       unsigned i;
+
+       for (i = 0; i < block->noutputs; i++) {
+               if (block->outputs[i]) {
+                       struct ir3_instruction *instr = block->outputs[i];
+                       instr_find_neighbors(instr);
+               }
+       }
+}
+
+static void instr_clear_neighbors(struct ir3_instruction *instr)
+{
+       unsigned i;
+
+       if (check_stop(instr))
+               return;
+
+       instr->cp.left_cnt = 0;
+       instr->cp.left = NULL;
+       instr->cp.right_cnt = 0;
+       instr->cp.right = NULL;
+
+       for (i = 1; i < instr->regs_count; i++) {
+               struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+               if (src_instr)
+                       instr_clear_neighbors(src_instr);
+       }
+}
+
+static void block_clear_neighbors(struct ir3_block *block)
+{
+       unsigned i;
+
+       for (i = 0; i < block->noutputs; i++) {
+               if (block->outputs[i]) {
+                       struct ir3_instruction *instr = block->outputs[i];
+                       instr_clear_neighbors(instr);
+               }
+       }
+}
+
 void ir3_block_cp(struct ir3_block *block)
 {
        ir3_clear_mark(block->shader);
+       block_clear_neighbors(block);
+       ir3_clear_mark(block->shader);
+       block_find_neighbors(block);
+       ir3_clear_mark(block->shader);
        block_cp(block);
 }