broadcom/compiler: only handle accumulator classes if present

author Iago Toral Quiroga <itoral@igalia.com>

Wed, 29 Sep 2021 10:10:31 +0000 (12:10 +0200)

committer Marge Bot <emma+marge@anholt.net>

Fri, 13 Oct 2023 22:37:41 +0000 (22:37 +0000)
author Iago Toral Quiroga <itoral@igalia.com>
Wed, 29 Sep 2021 10:10:31 +0000 (12:10 +0200)
committer Marge Bot <emma+marge@anholt.net>
Fri, 13 Oct 2023 22:37:41 +0000 (22:37 +0000)
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c

index f2df35c..e78ccb7 100644 (file)
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo)
          else
                  return CLASS_BITS_PHYS;
  }
+
+static uint8_t
+filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
+{
+   if (!devinfo->has_accumulators) {
+      assert(class_bits & CLASS_BITS_PHYS);
+      class_bits = CLASS_BITS_PHYS;
+   }
+   return class_bits;
+}
+
  static inline uint32_t
  temp_to_node(struct v3d_compile *c, uint32_t temp)
  {
@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c)
                   */
                  if (c->spilling) {
                          int temp_class = CLASS_BITS_PHYS;
-                        if (i != c->spill_base.index)
+                        if (c->devinfo->has_accumulators &&
+                            i != c->spill_base.index) {
                                  temp_class |= CLASS_BITS_ACC;
+                        }
                          add_node(c, i, temp_class);
                  }
          }
@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
           * temp will be used immediately so just like the uniform above we
           * can allow accumulators.
           */
+        int temp_class =
+                filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
          if (!fill_dst) {
                  struct qreg dst = vir_TMUWT(c);
                  assert(dst.file == QFILE_TEMP);
-                add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
+                add_node(c, dst.index, temp_class);
          } else {
                  *fill_dst = vir_LDTMU(c);
                  assert(fill_dst->file == QFILE_TEMP);
-                add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
+                add_node(c, fill_dst->index, temp_class);
          }
  
          /* Temps across the thread switch we injected can't be assigned to
@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
                                           * instruction immediately after so we
                                           * can use ACC.
                                           */
-                                        add_node(c, temp.index, CLASS_BITS_PHYS |
-                                                                CLASS_BITS_ACC);
+                                        int temp_class =
+                                                filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
+                                                                              CLASS_BITS_ACC);
+                                        add_node(c, temp.index, temp_class);
                                  } else {
                                          /* If we have a postponed spill, we
                                           * don't need a fill as the temp would
@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
                  compiler->reg_class_phys[threads] =
                          ra_alloc_contig_reg_class(compiler->regs, 1);
  
+                /* Init physical regs */
                  for (int i = phys_index;
                       i < phys_index + (PHYS_COUNT >> threads); i++) {
                          if (compiler->devinfo->has_accumulators)
@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
                          ra_class_add_reg(compiler->reg_class_any[threads], i);
                  }
  
+                /* Init accumulator regs */
                  if (compiler->devinfo->has_accumulators) {
                          for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
                                  ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
                                  ra_class_add_reg(compiler->reg_class_any[threads], i);
                          }
-                }
-                /* r5 can only store a single 32-bit value, so not much can
-                 * use it.
-                 */
-                if (compiler->devinfo->has_accumulators) {
+                        /* r5 can only store a single 32-bit value, so not much can
+                         * use it.
+                         */
                          ra_class_add_reg(compiler->reg_class_r5[threads],
                                           ACC_INDEX + 5);
                          ra_class_add_reg(compiler->reg_class_any[threads],
@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
                   * because ldunif has usually a shorter lifespan, allowing for
                   * more accumulator reuse and QPU merges.
                   */
-                if (!inst->qpu.sig.ldunif) {
-                        uint8_t class_bits =
-                                get_temp_class_bits(c, inst->dst.index) &
-                                ~CLASS_BITS_R5;
-                        set_temp_class_bits(c, inst->dst.index,
-                                            class_bits);
-
-                } else {
-                        /* Until V3D 4.x, we could only load a uniform
-                         * to r5, so we'll need to spill if uniform
-                         * loads interfere with each other.
-                         */
-                        if (c->devinfo->ver < 40) {
+                if (c->devinfo->has_accumulators) {
+                        if (!inst->qpu.sig.ldunif) {
+                                uint8_t class_bits =
+                                        get_temp_class_bits(c, inst->dst.index) &
+                                        ~CLASS_BITS_R5;
                                  set_temp_class_bits(c, inst->dst.index,
-                                                    CLASS_BITS_R5);
+                                                    class_bits);
+
+                        } else {
+                                /* Until V3D 4.x, we could only load a uniform
+                                 * to r5, so we'll need to spill if uniform
+                                 * loads interfere with each other.
+                                 */
+                                if (c->devinfo->ver < 40) {
+                                        set_temp_class_bits(c, inst->dst.index,
+                                                            CLASS_BITS_R5);
+                                }
                          }
                  }
          }
@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c)
                          c->thread_index--;
          }
  
-        c->g = ra_alloc_interference_graph(c->compiler->regs,
-                                           c->num_temps + ARRAY_SIZE(acc_nodes));
+        unsigned num_ra_nodes = c->num_temps;
+        if (c->devinfo->has_accumulators)
+                num_ra_nodes += ARRAY_SIZE(acc_nodes);
+        c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
          ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
  
          /* Make some fixed nodes for the accumulators, which we will need to
@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c)
           * live in, but the classes take up a lot of memory to set up, so we
           * don't want to make too many.
           */
-        for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) {
-                if (i < ACC_COUNT) {
+        for (uint32_t i = 0; i < num_ra_nodes; i++) {
+                if (c->devinfo->has_accumulators && i < ACC_COUNT) {
                          acc_nodes[i] = i;
                          ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
                          c->nodes.info[i].priority = 0;
author	Iago Toral Quiroga <itoral@igalia.com>
	Wed, 29 Sep 2021 10:10:31 +0000 (12:10 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 13 Oct 2023 22:37:41 +0000 (22:37 +0000)