lima/gpir: Rewrite register allocation for value registers

author Connor Abbott <cwabbott0@gmail.com>

Wed, 9 Oct 2019 13:36:56 +0000 (09:36 -0400)

committer Marge Bot <eric+marge@anholt.net>

Wed, 1 Sep 2021 08:30:57 +0000 (08:30 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Wed, 9 Oct 2019 13:36:56 +0000 (09:36 -0400)
committer Marge Bot <eric+marge@anholt.net>
Wed, 1 Sep 2021 08:30:57 +0000 (08:30 +0000)
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt b/src/gallium/drivers/lima/ci/deqp-lima-fails.txt

index 7bc535e..680b8f2 100644 (file)
--- a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt
+++ b/src/gallium/drivers/lima/ci/deqp-lima-fails.txt
@@ -37,14 +37,6 @@ dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_
  dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_dynamic_read_vertex,Fail
  dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_loop_read_vertex,Fail
  dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_vertex,Fail
  dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_loop_read_vertex,Fail
  dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_read_vertex,Fail
  dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_static_loop_read_vertex,Fail
diff --git a/src/gallium/drivers/lima/ir/gp/regalloc.c b/src/gallium/drivers/lima/ir/gp/regalloc.c

index de519b2..8526d1e 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/gp/regalloc.c
@@ -31,14 +31,7 @@ struct reg_info {
     BITSET_WORD *conflicts;
     struct util_dynarray conflict_list;
  
-   /* Number of conflicts that must be allocated to physical registers.
-    */
-   unsigned phys_conflicts;
-
-   unsigned node_conflicts;
-
-   /* Number of conflicts that can be allocated to either. */
-   unsigned total_conflicts;
+   unsigned num_conflicts;
  
     int assigned_color;
  
@@ -46,7 +39,7 @@ struct reg_info {
  };
  
  struct regalloc_ctx {
-   unsigned bitset_words, num_nodes_and_regs;
+   unsigned bitset_words;
     struct reg_info *registers;
  
     /* Reusable scratch liveness array */
@@ -64,8 +57,8 @@ struct regalloc_ctx {
  
  /* Liveness analysis */
  
-static void propagate_liveness_instr(gpir_node *node, BITSET_WORD *live,
-                                     gpir_compiler *comp)
+static void propagate_liveness_node(gpir_node *node, BITSET_WORD *live,
+                                    gpir_compiler *comp)
  {
     /* KILL */
     if (node->type == gpir_node_type_store) {
@@ -96,7 +89,7 @@ static bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx
     memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));
  
     list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
-      propagate_liveness_instr(node, ctx->live, block->comp);
+      propagate_liveness_node(node, ctx->live, block->comp);
     }
  
     bool changed = false;
@@ -166,18 +159,8 @@ static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
     BITSET_SET(a->conflicts, j);
     BITSET_SET(b->conflicts, i);
  
-   a->total_conflicts++;
-   b->total_conflicts++;
-   if (j < ctx->comp->cur_reg)
-      a->phys_conflicts++;
-   else
-      a->node_conflicts++;
-
-   if (i < ctx->comp->cur_reg)
-      b->phys_conflicts++;
-   else
-      b->node_conflicts++;
-
+   a->num_conflicts++;
+   b->num_conflicts++;
     util_dynarray_append(&a->conflict_list, unsigned, j);
     util_dynarray_append(&b->conflict_list, unsigned, i);
  }
@@ -187,24 +170,17 @@ static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
   */
  static void add_all_interferences(struct regalloc_ctx *ctx,
                                    unsigned i,
-                                  BITSET_WORD *live_nodes,
                                    BITSET_WORD *live_regs)
  {
-   int live_node;
-   BITSET_FOREACH_SET(live_node, live_nodes, ctx->comp->cur_index) {
-      add_interference(ctx, i,
-                       live_node + ctx->comp->cur_reg);
-   }
-
     int live_reg;
-   BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_index) {
+   BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_reg) {
        add_interference(ctx, i, live_reg);
     }
  
  }
  
  static void print_liveness(struct regalloc_ctx *ctx,
-                           BITSET_WORD *live_reg, BITSET_WORD *live_val)
+                           BITSET_WORD *live_reg)
  {
     if (!(lima_debug & LIMA_DEBUG_GP))
        return;
@@ -213,17 +189,11 @@ static void print_liveness(struct regalloc_ctx *ctx,
     BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) {
        printf("reg%d ", live_idx);
     }
-   BITSET_FOREACH_SET(live_idx, live_val, ctx->comp->cur_index) {
-      printf("%d ", live_idx);
-   }
     printf("\n");
  }
  
  static void calc_interference(struct regalloc_ctx *ctx)
  {
-   BITSET_WORD *live_nodes =
-      rzalloc_array(ctx->mem_ctx, BITSET_WORD, ctx->comp->cur_index);
-
     list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
        /* Initialize liveness at the end of the block, but exclude values that
         * definitely aren't defined by the end. This helps out with
@@ -247,35 +217,15 @@ static void calc_interference(struct regalloc_ctx *ctx)
  
        list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
           gpir_debug("processing node %d\n", node->index);
-         print_liveness(ctx, ctx->live, live_nodes);
-         if (node->type != gpir_node_type_store &&
-             node->type != gpir_node_type_branch) {
-            add_all_interferences(ctx, node->index + ctx->comp->cur_reg,
-                                  live_nodes, ctx->live);
-
-            /* KILL */
-            BITSET_CLEAR(live_nodes, node->index);
-         } else if (node->op == gpir_op_store_reg) {
+         print_liveness(ctx, ctx->live);
+         if (node->op == gpir_op_store_reg) {
              gpir_store_node *store = gpir_node_to_store(node);
-            add_all_interferences(ctx, store->reg->index,
-                                  live_nodes, ctx->live);
+            add_all_interferences(ctx, store->reg->index, ctx->live);
  
              /* KILL */
              BITSET_CLEAR(ctx->live, store->reg->index);
-         }
-
-         /* GEN */
-         if (node->type == gpir_node_type_store) {
-            gpir_store_node *store = gpir_node_to_store(node);
-            BITSET_SET(live_nodes, store->child->index);
-         } else if (node->type == gpir_node_type_alu) {
-            gpir_alu_node *alu = gpir_node_to_alu(node);
-            for (int i = 0; i < alu->num_child; i++)
-               BITSET_SET(live_nodes, alu->children[i]->index);
-         } else if (node->type == gpir_node_type_branch) {
-            gpir_branch_node *branch = gpir_node_to_branch(node);
-            BITSET_SET(live_nodes, branch->cond->index);
           } else if (node->op == gpir_op_load_reg) {
+            /* GEN */
              gpir_load_node *load = gpir_node_to_load(node);
              BITSET_SET(ctx->live, load->reg->index);
           }
@@ -288,39 +238,21 @@ static void calc_interference(struct regalloc_ctx *ctx)
  static bool can_simplify(struct regalloc_ctx *ctx, unsigned i)
  {
     struct reg_info *info = &ctx->registers[i];
-   if (i < ctx->comp->cur_reg) {
-      /* Physical regs. */
-      return info->phys_conflicts + info->node_conflicts < GPIR_PHYSICAL_REG_NUM;
-   } else {
-      /* Nodes: if we manage to allocate all of its conflicting physical
-       * registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so
-       * we can ignore any more than that.
-       */
-      return MIN2(info->phys_conflicts, GPIR_PHYSICAL_REG_NUM) + 
-         info->node_conflicts < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
-   }
+   return info->num_conflicts < GPIR_PHYSICAL_REG_NUM;
  }
  
  static void push_stack(struct regalloc_ctx *ctx, unsigned i)
  {
     ctx->stack[ctx->stack_size++] = i;
-   if (i < ctx->comp->cur_reg)
-      gpir_debug("pushing reg%u\n", i);
-   else
-      gpir_debug("pushing %d\n", i - ctx->comp->cur_reg);
+   gpir_debug("pushing reg%u\n", i);
  
     struct reg_info *info = &ctx->registers[i];
     assert(info->visited);
  
     util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {
        struct reg_info *conflict_info = &ctx->registers[*conflict];
-      if (i < ctx->comp->cur_reg) {
-         assert(conflict_info->phys_conflicts > 0);
-         conflict_info->phys_conflicts--;
-      } else {
-         assert(conflict_info->node_conflicts > 0);
-         conflict_info->node_conflicts--;
-      }
+      assert(conflict_info->num_conflicts > 0);
+      conflict_info->num_conflicts--;
        if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {
           ctx->worklist[ctx->worklist_end++] = *conflict;
           ctx->registers[*conflict].visited = true;
@@ -335,7 +267,7 @@ static bool do_regalloc(struct regalloc_ctx *ctx)
     ctx->stack_size = 0;
  
     /* Step 1: find the initially simplifiable registers */
-   for (int i = 0; i < ctx->comp->cur_reg + ctx->comp->cur_index; i++) {
+   for (int i = 0; i < ctx->comp->cur_reg; i++) {
        if (can_simplify(ctx, i)) {
           ctx->worklist[ctx->worklist_end++] = i;
           ctx->registers[i].visited = true;
@@ -348,7 +280,7 @@ static bool do_regalloc(struct regalloc_ctx *ctx)
           push_stack(ctx, ctx->worklist[ctx->worklist_start++]);
        }
  
-      if (ctx->stack_size < ctx->num_nodes_and_regs) {
+      if (ctx->stack_size < ctx->comp->cur_reg) {
           /* If there are still unsimplifiable nodes left, we need to
            * optimistically push a node onto the stack.  Choose the one with
            * the smallest number of current neighbors, since that's the most
@@ -356,13 +288,13 @@ static bool do_regalloc(struct regalloc_ctx *ctx)
            */
           unsigned min_conflicts = UINT_MAX;
           unsigned best_reg = 0;
-         for (unsigned reg = 0; reg < ctx->num_nodes_and_regs; reg++) {
+         for (int reg = 0; reg < ctx->comp->cur_reg; reg++) {
              struct reg_info *info = &ctx->registers[reg];
              if (info->visited)
                 continue;
-            if (info->phys_conflicts + info->node_conflicts < min_conflicts) {
+            if (info->num_conflicts < min_conflicts) {
                 best_reg = reg;
-               min_conflicts = info->phys_conflicts + info->node_conflicts;
+               min_conflicts = info->num_conflicts;
              }
           }
           gpir_debug("optimistic triggered\n");
@@ -374,21 +306,14 @@ static bool do_regalloc(struct regalloc_ctx *ctx)
     }
  
     /* Step 4: pop off the stack and assign colors */
-   for (int i = ctx->num_nodes_and_regs - 1; i >= 0; i--) {
+   for (int i = ctx->comp->cur_reg - 1; i >= 0; i--) {
        unsigned idx = ctx->stack[i];
        struct reg_info *reg = &ctx->registers[idx];
  
-      unsigned num_available_regs;
-      if (idx < ctx->comp->cur_reg) {
-         num_available_regs = GPIR_PHYSICAL_REG_NUM;
-      } else {
-         num_available_regs = GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM;
-      }
-
        bool found = false;
-      unsigned start = i % num_available_regs;
-      for (unsigned j = 0; j < num_available_regs; j++) {
-         unsigned candidate = (j + start) % num_available_regs;
+      unsigned start = i % GPIR_PHYSICAL_REG_NUM;
+      for (unsigned j = 0; j < GPIR_PHYSICAL_REG_NUM; j++) {
+         unsigned candidate = (j + start) % GPIR_PHYSICAL_REG_NUM;
           bool available = true;
           util_dynarray_foreach(&reg->conflict_list, unsigned, conflict_idx) {
              struct reg_info *conflict = &ctx->registers[*conflict_idx];
@@ -420,11 +345,6 @@ static void assign_regs(struct regalloc_ctx *ctx)
  {
     list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
        list_for_each_entry(gpir_node, node, &block->node_list, list) {
-         if (node->index >= 0) {
-            node->value_reg =
-               ctx->registers[ctx->comp->cur_reg + node->index].assigned_color;
-         }
-
           if (node->op == gpir_op_load_reg) {
              gpir_load_node *load = gpir_node_to_load(node);
              unsigned color = ctx->registers[load->reg->index].assigned_color;
@@ -452,6 +372,195 @@ static void assign_regs(struct regalloc_ctx *ctx)
     }
  }
  
+/* Value register allocation */
+
+/* Define a special token for when the register is occupied by a preallocated
+ * physical register (i.e. load_reg/store_reg). Normally entries in the "live"
+ * array points to the definition of the value, but there may be multiple
+ * definitions in this case, and they will certainly come from other basic
+ * blocks, so it doesn't make sense to do that here.
+ */
+static gpir_node __physreg_live;
+#define PHYSREG_LIVE (&__physreg_live)
+
+struct value_regalloc_ctx {
+   gpir_node *last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
+   gpir_node *complex1_last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
+   gpir_node *live[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
+   gpir_node *last_complex1;
+   unsigned alloc_start;
+};
+
+static unsigned find_free_value_reg(struct value_regalloc_ctx *ctx)
+{
+   /* Implement round-robin allocation */
+   unsigned reg_offset = ctx->alloc_start++;
+   if (ctx->alloc_start == GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM)
+      ctx->alloc_start = 0;
+
+   unsigned reg = UINT_MAX;
+   for (unsigned reg_base = 0;
+        reg_base < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
+        reg_base++) {
+      unsigned cur_reg = (reg_base + reg_offset) % (GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM);
+      if (!ctx->live[cur_reg]) {
+         reg = cur_reg;
+         break;
+      }
+   }
+
+   return reg;
+}
+
+static void add_fake_dep(gpir_node *node, gpir_node *src,
+                         struct value_regalloc_ctx *ctx)
+{
+   assert(src->value_reg >= 0);
+   if (ctx->last_written[src->value_reg] &&
+       ctx->last_written[src->value_reg] != node) {
+      gpir_node_add_dep(ctx->last_written[src->value_reg], node,
+                        GPIR_DEP_WRITE_AFTER_READ);
+   }
+
+   /* For a sequence of schedule_first nodes right before a complex1
+    * node, add any extra fake dependencies necessary so that the
+    * schedule_first nodes can be scheduled right after the complex1 is
+    * scheduled. We have to save the last_written before complex1 happens to
+    * avoid adding dependencies to children of the complex1 node which would
+    * create a cycle.
+    */
+
+   if (gpir_op_infos[node->op].schedule_first &&
+       ctx->last_complex1 &&
+       ctx->complex1_last_written[src->value_reg]) {
+      gpir_node_add_dep(ctx->complex1_last_written[src->value_reg],
+                        ctx->last_complex1,
+                        GPIR_DEP_WRITE_AFTER_READ);
+   }
+}
+
+static bool handle_value_read(gpir_node *node, gpir_node *src,
+                              struct value_regalloc_ctx *ctx)
+{
+   /* If already allocated, don't allocate it */
+   if (src->value_reg < 0) {
+      unsigned reg = find_free_value_reg(ctx);
+      if (reg == UINT_MAX)
+         return false;
+
+      src->value_reg = reg;
+      ctx->live[reg] = src;
+   }
+
+   /* Add any fake dependencies. Note: this is the actual result of value
+    * register allocation. We throw away node->value_reg afterwards, since
+    * it's really the fake dependencies which constrain the post-RA scheduler
+    * enough to make sure it never needs to spill to temporaries.
+    */
+   add_fake_dep(node, src, ctx);
+
+   return true;
+}
+
+static bool handle_reg_read(gpir_load_node *load,
+                            struct value_regalloc_ctx *ctx)
+{
+   unsigned idx = load->index * 4 + load->component;
+   if (!ctx->live[idx]) {
+      ctx->live[idx] = PHYSREG_LIVE;
+   } else if (ctx->live[idx] != PHYSREG_LIVE) {
+      /* This slot is occupied by some other value register, so we need to
+       * evict it. This effectively splits the live range of the value
+       * register. NB: since we create fake dependencies on the fly, and the
+       * fake dependencies are the only output of this pass, we don't actually
+       * have to record where the split happened or that this value was
+       * assigned to two different registers. Any actual live range splitting
+       * happens in the post-RA scheduler, which moves the value to and from
+       * the register file. This will just cause some reads of the value
+       * register to have different fake dependencies.
+       */
+      unsigned new_reg = find_free_value_reg(ctx);
+      if (new_reg == UINT_MAX)
+         return false;
+      ctx->live[new_reg] = ctx->live[idx];
+      ctx->live[new_reg]->value_reg = new_reg;
+      ctx->live[idx] = PHYSREG_LIVE;
+   }
+
+   if (ctx->last_written[idx]) {
+      gpir_node_add_dep(ctx->last_written[idx], &load->node,
+                        GPIR_DEP_WRITE_AFTER_READ);
+   }
+
+   return true;
+}
+
+static void handle_reg_write(gpir_store_node *store,
+                             struct value_regalloc_ctx *ctx)
+{
+   unsigned idx = store->index * 4 + store->component;
+   store->node.value_reg = idx;
+   ctx->last_written[idx] = &store->node;
+   ctx->live[idx] = NULL;
+}
+
+static void handle_value_write(gpir_node *node,
+                               struct value_regalloc_ctx *ctx)
+{
+   ctx->last_written[node->value_reg] = node;
+   ctx->live[node->value_reg] = NULL;
+}
+
+static bool regalloc_value_regs(gpir_block *block)
+{
+   struct value_regalloc_ctx ctx = { { 0 } };
+
+   list_for_each_entry(gpir_node, node, &block->node_list, list) {
+      node->value_reg = -1;
+   }
+
+   list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
+      if (node->op == gpir_op_complex1) {
+         ctx.last_complex1 = node;
+         memcpy(ctx.complex1_last_written, ctx.last_written,
+                sizeof(ctx.complex1_last_written));
+      }
+
+      if (node->type != gpir_node_type_store &&
+          node->type != gpir_node_type_branch) {
+         handle_value_write(node, &ctx);
+      } else if (node->op == gpir_op_store_reg) {
+         handle_reg_write(gpir_node_to_store(node), &ctx);
+      }
+
+      if (node->type == gpir_node_type_store) {
+         gpir_store_node *store = gpir_node_to_store(node);
+         if (!handle_value_read(&store->node, store->child, &ctx))
+            return false;
+      } else if (node->type == gpir_node_type_alu) {
+         gpir_alu_node *alu = gpir_node_to_alu(node);
+         for (int i = 0; i < alu->num_child; i++) {
+            if (!handle_value_read(&alu->node, alu->children[i], &ctx))
+               return false;
+         }
+      } else if (node->type == gpir_node_type_branch) {
+         /* At the end of a block the top 11 values are always free, so
+          * branches should always succeed.
+          */
+         gpir_branch_node *branch = gpir_node_to_branch(node);
+         ASSERTED bool result = handle_value_read(&branch->node,
+                                                  branch->cond, &ctx);
+         assert(result);
+      } else if (node->op == gpir_op_load_reg) {
+         gpir_load_node *load = gpir_node_to_load(node);
+         if (!handle_reg_read(load, &ctx))
+             return false;
+      }
+   }
+
+   return true;
+}
+
  static void regalloc_print_result(gpir_compiler *comp)
  {
     if (!(lima_debug & LIMA_DEBUG_GP))
@@ -486,15 +595,14 @@ bool gpir_regalloc_prog(gpir_compiler *comp)
     struct regalloc_ctx ctx;
  
     ctx.mem_ctx = ralloc_context(NULL);
-   ctx.num_nodes_and_regs = comp->cur_reg + comp->cur_index;
-   ctx.bitset_words = BITSET_WORDS(ctx.num_nodes_and_regs);
+   ctx.bitset_words = BITSET_WORDS(comp->cur_reg);
     ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
-   ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
-   ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
+   ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg);
+   ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg);
     ctx.comp = comp;
  
-   ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, ctx.num_nodes_and_regs);
-   for (unsigned i = 0; i < ctx.num_nodes_and_regs; i++) {
+   ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, comp->cur_reg);
+   for (int i = 0; i < comp->cur_reg; i++) {
        ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,
                                                   ctx.bitset_words);
        util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);
@@ -514,6 +622,11 @@ bool gpir_regalloc_prog(gpir_compiler *comp)
     }
     assign_regs(&ctx);
  
+   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+      if (!regalloc_value_regs(block))
+         return false;
+   }
+
     regalloc_print_result(comp);
     ralloc_free(ctx.mem_ctx);
     return true;
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c

index 90a830f..78128bd 100644 (file)
--- a/src/gallium/drivers/lima/ir/gp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/scheduler.c
@@ -1612,33 +1612,8 @@ static bool schedule_block(gpir_block *block)
     return true;
  }
  
-static void add_fake_dep(gpir_node *node, gpir_node *dep_node,
-                         gpir_node *last_written[])
-{
-      gpir_node_foreach_pred(node, dep) {
-         if (dep->type == GPIR_DEP_INPUT) {
-            int index = dep->pred->value_reg;
-            if (index >= 0 && last_written[index]) {
-               gpir_node_add_dep(last_written[index], dep_node,
-                                 GPIR_DEP_WRITE_AFTER_READ);
-            }
-            if (gpir_op_infos[dep->pred->op].schedule_first) {
-               /* Insert fake dependencies for any schedule_first children on
-                * this node as well. This guarantees that as soon as
-                * "dep_node" is ready to schedule, all of its schedule_first
-                * children, grandchildren, etc. are ready so that they can be
-                * scheduled as soon as possible.
-                */
-               add_fake_dep(dep->pred, dep_node, last_written);
-            }
-         }
-      }
-}
-
  static void schedule_build_dependency(gpir_block *block)
  {
-   gpir_node *last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM] = {0};
-
     /* merge dummy_f/m to the node created from */
     list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
        if (node->op == gpir_op_dummy_m) {
@@ -1658,30 +1633,6 @@ static void schedule_build_dependency(gpir_block *block)
           gpir_node_delete(node);
        }
     }
-
-   memset(last_written, 0, sizeof(last_written));
-
-   /* False dependencies. For value registers, these exist only to make sure
-    * that the maximum pressure isn't exceeded and are hence "fake".
-    */
-   list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
-      if (node->op == gpir_op_load_reg) {
-         gpir_load_node *load = gpir_node_to_load(node);
-         unsigned index = 4 * load->index + load->component;
-         if (last_written[index]) {
-            gpir_node_add_dep(last_written[index], node, GPIR_DEP_WRITE_AFTER_READ);
-         }
-      } else if (node->op == gpir_op_store_reg) {
-         gpir_store_node *store = gpir_node_to_store(node);
-         unsigned index = 4 * store->index + store->component;
-         last_written[index] = node;
-      } else {
-         add_fake_dep(node, node, last_written);
-      }
-
-      if (node->value_reg >= 0)
-         last_written[node->value_reg] = node;
-   }
  }
  
  static void print_statistic(gpir_compiler *comp, int save_index)
author	Connor Abbott <cwabbott0@gmail.com>
	Wed, 9 Oct 2019 13:36:56 +0000 (09:36 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 1 Sep 2021 08:30:57 +0000 (08:30 +0000)
src/gallium/drivers/lima/ci/deqp-lima-fails.txt		patch \| blob \| history
src/gallium/drivers/lima/ir/gp/regalloc.c		patch \| blob \| history
src/gallium/drivers/lima/ir/gp/scheduler.c		patch \| blob \| history