broadcom/compiler: prefer reconstruction over TMU spills when possible

author Iago Toral Quiroga <itoral@igalia.com>

Fri, 1 Apr 2022 08:51:50 +0000 (10:51 +0200)

committer Marge Bot <emma+marge@anholt.net>

Fri, 8 Apr 2022 05:37:28 +0000 (05:37 +0000)
author Iago Toral Quiroga <itoral@igalia.com>
Fri, 1 Apr 2022 08:51:50 +0000 (10:51 +0200)
committer Marge Bot <emma+marge@anholt.net>
Fri, 8 Apr 2022 05:37:28 +0000 (05:37 +0000)
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c

index f7f177bdd627c3543537c64b1d247895e8ed557d..0fd52a988d3795e761bd3522c22f275c217a6089 100644 (file)
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -132,6 +132,97 @@ vir_is_mov_uniform(struct v3d_compile *c, int temp)
          return def && def->qpu.sig.ldunif;
  }
  
+static bool
+can_reconstruct_inst(struct qinst *inst)
+{
+        assert(inst);
+
+        if (vir_is_add(inst)) {
+                switch (inst->qpu.alu.add.op) {
+                case V3D_QPU_A_FXCD:
+                case V3D_QPU_A_FYCD:
+                case V3D_QPU_A_XCD:
+                case V3D_QPU_A_YCD:
+                case V3D_QPU_A_IID:
+                case V3D_QPU_A_EIDX:
+                case V3D_QPU_A_TIDX:
+                case V3D_QPU_A_SAMPID:
+                        /* No need to check input unpacks because none of these
+                         * opcodes read sources. FXCD,FYCD have pack variants.
+                         */
+                        return inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
+                               inst->qpu.flags.auf == V3D_QPU_UF_NONE &&
+                               inst->qpu.flags.apf == V3D_QPU_PF_NONE &&
+                               inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE;
+                default:
+                        return false;
+                }
+        }
+
+        return false;
+}
+
+static bool
+can_reconstruct_temp(struct v3d_compile *c, int temp)
+{
+        struct qinst *def = c->defs[temp];
+        return def && can_reconstruct_inst(def);
+}
+
+static struct qreg
+reconstruct_temp(struct v3d_compile *c, enum v3d_qpu_add_op op)
+{
+        struct qreg dest;
+        switch (op) {
+        case V3D_QPU_A_FXCD:
+                dest = vir_FXCD(c);
+                break;
+        case V3D_QPU_A_FYCD:
+                dest = vir_FYCD(c);
+                break;
+        case V3D_QPU_A_XCD:
+                dest = vir_XCD(c);
+                break;
+        case V3D_QPU_A_YCD:
+                dest = vir_YCD(c);
+                break;
+        case V3D_QPU_A_IID:
+                dest = vir_IID(c);
+                break;
+        case V3D_QPU_A_EIDX:
+                dest = vir_EIDX(c);
+                break;
+        case V3D_QPU_A_TIDX:
+                dest = vir_TIDX(c);
+                break;
+        case V3D_QPU_A_SAMPID:
+                dest = vir_SAMPID(c);
+                break;
+        default:
+            unreachable("Unexpected opcode for reconstruction");
+        }
+
+        return dest;
+}
+
+enum temp_spill_type {
+        SPILL_TYPE_UNIFORM,
+        SPILL_TYPE_RECONSTRUCT,
+        SPILL_TYPE_TMU
+};
+
+static enum temp_spill_type
+get_spill_type_for_temp(struct v3d_compile *c, int temp)
+{
+   if (vir_is_mov_uniform(c, temp))
+      return SPILL_TYPE_UNIFORM;
+
+   if (can_reconstruct_temp(c, temp))
+      return SPILL_TYPE_RECONSTRUCT;
+
+   return SPILL_TYPE_TMU;
+}
+
  static int
  v3d_choose_spill_node(struct v3d_compile *c)
  {
@@ -160,7 +251,10 @@ v3d_choose_spill_node(struct v3d_compile *c)
                                          continue;
  
                                  int temp = inst->src[i].index;
-                                if (vir_is_mov_uniform(c, temp)) {
+                                enum temp_spill_type spill_type =
+                                        get_spill_type_for_temp(c, temp);
+
+                                if (spill_type != SPILL_TYPE_TMU) {
                                          spill_costs[temp] += block_scale;
                                  } else if (!no_spilling) {
                                          float tmu_op_scale = in_tmu_operation ?
@@ -175,11 +269,11 @@ v3d_choose_spill_node(struct v3d_compile *c)
  
                          if (inst->dst.file == QFILE_TEMP) {
                                  int temp = inst->dst.index;
+                                enum temp_spill_type spill_type =
+                                        get_spill_type_for_temp(c, temp);
  
-                                if (vir_is_mov_uniform(c, temp)) {
-                                        /* We just rematerialize the unform
-                                         * later.
-                                         */
+                                if (spill_type != SPILL_TYPE_TMU) {
+                                        /* We just rematerialize it later */
                                  } else if (!no_spilling) {
                                          spill_costs[temp] += (block_scale *
                                                                tmu_scale);
@@ -443,11 +537,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
          c->spill_start_num_temps = c->num_temps;
          c->spilling = true;
  
-        bool is_uniform = vir_is_mov_uniform(c, spill_temp);
+        enum temp_spill_type spill_type = get_spill_type_for_temp(c, spill_temp);
  
          uint32_t spill_offset = 0;
-
-        if (!is_uniform) {
+        if (spill_type == SPILL_TYPE_TMU) {
                  spill_offset = c->spill_size;
                  c->spill_size += V3D_CHANNELS * sizeof(uint32_t);
  
@@ -459,11 +552,18 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
          assert(last_thrsw && last_thrsw->is_last_thrsw);
  
          int uniform_index = ~0;
-        if (is_uniform) {
+        if (spill_type == SPILL_TYPE_UNIFORM) {
                  struct qinst *orig_unif = c->defs[spill_temp];
                  uniform_index = orig_unif->uniform;
          }
  
+        enum v3d_qpu_add_op reconstruct_op = V3D_QPU_A_NOP;
+        if (spill_type == SPILL_TYPE_RECONSTRUCT) {
+                struct qinst *orig_def = c->defs[spill_temp];
+                assert(vir_is_add(orig_def));
+                reconstruct_op = orig_def->qpu.alu.add.op;
+        }
+
          uint32_t spill_node = temp_to_node(spill_temp);
  
          /* We must disable the ldunif optimization if we are spilling uniforms */
@@ -515,7 +615,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
  
                                  c->cursor = vir_before_inst(inst);
  
-                                if (is_uniform) {
+                                if (spill_type == SPILL_TYPE_UNIFORM) {
                                          struct qreg unif =
                                                  vir_uniform(c,
                                                              c->uniform_contents[uniform_index],
@@ -526,6 +626,16 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
                                           * we can use any register class for it.
                                           */
                                          add_node(c, unif.index, CLASS_BITS_ANY);
+                                } else if (spill_type == SPILL_TYPE_RECONSTRUCT) {
+                                        struct qreg temp =
+                                                reconstruct_temp(c, reconstruct_op);
+                                        inst->src[i] = temp;
+                                        /* We are using the temp in the
+                                         * instruction immediately after so we
+                                         * can use ACC.
+                                         */
+                                        add_node(c, temp.index, CLASS_BITS_PHYS |
+                                                                CLASS_BITS_ACC);
                                  } else {
                                          /* If we have a postponed spill, we
                                           * don't need a fill as the temp would
@@ -555,7 +665,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
                          /* spills */
                          if (inst->dst.file == QFILE_TEMP &&
                              inst->dst.index == spill_temp) {
-                                if (is_uniform) {
+                                if (spill_type != SPILL_TYPE_TMU) {
                                          c->cursor.link = NULL;
                                          vir_remove_instruction(c, inst);
                                  } else {
@@ -630,7 +740,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
                          }
                  }
  
-                if (!is_uniform) {
+                if (spill_type == SPILL_TYPE_TMU) {
                          if (i != sb_temp &&
                              interferes(c->temp_start[i], c->temp_end[i],
                                         c->temp_start[sb_temp], c->temp_end[sb_temp])) {
@@ -1060,9 +1170,9 @@ v3d_register_allocate(struct v3d_compile *c)
                          goto spill_fail;
  
                  uint32_t temp = node_to_temp(node);
-
-                bool is_uniform = vir_is_mov_uniform(c, temp);
-                if (is_uniform || tmu_spilling_allowed(c)) {
+                enum temp_spill_type spill_type =
+                        get_spill_type_for_temp(c, temp);
+                if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
                          v3d_spill_reg(c, acc_nodes, temp);
                          if (c->spills + c->fills > c->max_tmu_spills)
                                  goto spill_fail;
author	Iago Toral Quiroga <itoral@igalia.com>
	Fri, 1 Apr 2022 08:51:50 +0000 (10:51 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 8 Apr 2022 05:37:28 +0000 (05:37 +0000)