From 69f8daca16607a891842d4517542719759ba22dd Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Mon, 29 May 2023 13:05:56 -0400
Subject: [PATCH] pan/mdg: Ingest new-style registers

Switch to register intrinsics, using the helpers. Since our backend copyprop
chokes on non-SSA moves, we get better coalescing with this approach, hence the
small improvements to instruction count / cycle count in shader-db. Changes to
register pressure seem to be noise from iteration order. I'm not too worried.

   total instructions in shared programs: 1508444 -> 1508193 (-0.02%)
   instructions in affected programs: 42581 -> 42330 (-0.59%)
   helped: 482
   HURT: 41
   Inconclusive result (value mean confidence interval includes 0).

   total bundles in shared programs: 643023 -> 643136 (0.02%)
   bundles in affected programs: 16318 -> 16431 (0.69%)
   helped: 230
   HURT: 85
   Inconclusive result (value mean confidence interval includes 0).

   total quadwords in shared programs: 1125992 -> 1125600 (-0.03%)
   quadwords in affected programs: 125366 -> 124974 (-0.31%)
   helped: 507
   HURT: 351
   Quadwords are helped.

   total registers in shared programs: 90632 -> 90554 (-0.09%)
   registers in affected programs: 669 -> 591 (-11.66%)
   helped: 114
   HURT: 31
   Registers are helped.

   total threads in shared programs: 55607 -> 55600 (-0.01%)
   threads in affected programs: 20 -> 13 (-35.00%)
   helped: 1
   HURT: 7
   Inconclusive result (value mean confidence interval includes 0).

   total spills in shared programs: 1371 -> 1437 (4.81%)
   spills in affected programs: 44 -> 110 (150.00%)
   helped: 0
   HURT: 2

   total fills in shared programs: 5133 -> 5273 (2.73%)
   fills in affected programs: 84 -> 224 (166.67%)
   helped: 0
   HURT: 2

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23089>
---
 src/panfrost/midgard/compiler.h            | 40 ++++++++++++++++++--------
 src/panfrost/midgard/midgard_compile.c     | 45 ++++++++++++++++++++----------
 src/panfrost/midgard/midgard_derivatives.c |  9 +-----
 3 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 2272933..1a362bd 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -456,7 +456,7 @@ make_compiler_temp(compiler_context *ctx)
 static inline unsigned
 make_compiler_temp_reg(compiler_context *ctx)
 {
-   return ((ctx->func->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
+   return ((ctx->func->impl->ssa_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
 }
 
 static bool
@@ -472,27 +472,43 @@ nir_ssa_index(nir_ssa_def *ssa)
 }
 
 static inline unsigned
+nir_reg_index(nir_ssa_def *handle)
+{
+   return (handle->index << 1) | PAN_IS_REG;
+}
+
+static inline unsigned
 nir_src_index(compiler_context *ctx, nir_src *src)
 {
-   if (src->is_ssa)
+   nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa);
+
+   if (load)
+      return nir_reg_index(load->src[0].ssa);
+   else
       return nir_ssa_index(src->ssa);
-   else {
-      assert(!src->reg.indirect);
-      return (src->reg.reg->index << 1) | PAN_IS_REG;
-   }
 }
 
 static inline unsigned
-nir_dest_index(nir_dest *dst)
+nir_dest_index_with_mask(nir_dest *dest, uint16_t *write_mask)
 {
-   if (dst->is_ssa)
-      return (dst->ssa.index << 1) | 0;
-   else {
-      assert(!dst->reg.indirect);
-      return (dst->reg.reg->index << 1) | PAN_IS_REG;
+   nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa);
+
+   if (store) {
+      *write_mask = nir_intrinsic_write_mask(store);
+      return nir_reg_index(store->src[1].ssa);
+   } else {
+      *write_mask = (uint16_t)BITFIELD_MASK(nir_dest_num_components(*dest));
+      return nir_ssa_index(&dest->ssa);
    }
 }
 
+static inline unsigned
+nir_dest_index(nir_dest *dest)
+{
+   uint16_t write_mask = 0;
+   return nir_dest_index_with_mask(dest, &write_mask);
+}
+
 /* MIR manipulation */
 
 void mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new);
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 13bfb75..9473b60 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -480,13 +480,14 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
    NIR_PASS_V(nir, nir_opt_move, move_all);
 
    /* Take us out of SSA */
-   NIR_PASS(progress, nir, nir_convert_from_ssa, true, false);
+   NIR_PASS(progress, nir, nir_convert_from_ssa, true, true);
 
    /* We are a vector architecture; write combine where possible */
    NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
-   NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
+   NIR_PASS(progress, nir, nir_lower_vec_to_regs, NULL, NULL);
 
    NIR_PASS(progress, nir, nir_opt_dce);
+   NIR_PASS_V(nir, nir_trivialize_registers);
 }
 
 /* Do not actually emit a load; instead, cache the constant for inlining */
@@ -634,8 +635,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
       return;
    }
 
-   bool is_ssa = dest->is_ssa;
-
    unsigned nr_components = nir_dest_num_components(*dest);
    unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
    unsigned op = 0;
@@ -875,12 +874,13 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
    midgard_instruction ins = {
       .type = TAG_ALU_4,
-      .dest = nir_dest_index(dest),
       .dest_type =
          nir_op_infos[instr->op].output_type | nir_dest_bit_size(*dest),
       .roundmode = roundmode,
    };
 
+   ins.dest = nir_dest_index_with_mask(dest, &ins.mask);
+
    for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i)
       ins.src[i] = ~0;
 
@@ -920,15 +920,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
          ins.src_abs[1] = true;
    }
 
-   ins.mask = mask_of(nr_components);
-
-   /* Apply writemask if non-SSA, keeping in mind that we can't write to
-    * components that don't exist. Note modifier => SSA => !reg => no
-    * writemask, so we don't have to worry about writemasks here.*/
-
-   if (!is_ssa)
-      ins.mask &= instr->dest.write_mask;
-
    ins.op = op;
    ins.outmod = outmod;
 
@@ -1550,6 +1541,32 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
    unsigned offset = 0, reg;
 
    switch (instr->intrinsic) {
+   case nir_intrinsic_decl_reg:
+   case nir_intrinsic_store_reg:
+      /* Always fully consumed */
+      break;
+
+   case nir_intrinsic_load_reg: {
+      /* NIR guarantees that, for typical isel, this will always be fully
+       * consumed. However, we also do our own nir_ssa_scalar chasing for
+       * address arithmetic, bypassing the source chasing helpers. So we can end
+       * up with unconsumed load_register instructions. Translate them here. 99%
+       * of the time, these moves will be DCE'd away.
+       */
+      assert(instr->src[0].is_ssa);
+      nir_ssa_def *handle = instr->src[0].ssa;
+
+      midgard_instruction ins =
+         v_mov(nir_reg_index(handle), nir_dest_index(&instr->dest));
+
+      ins.dest_type = ins.src_types[1] =
+         nir_type_uint | nir_dest_bit_size(instr->dest);
+
+      ins.mask = BITFIELD_MASK(nir_dest_num_components(instr->dest));
+      emit_mir_instruction(ctx, ins);
+      break;
+   }
+
    case nir_intrinsic_discard_if:
    case nir_intrinsic_discard: {
       bool conditional = instr->intrinsic == nir_intrinsic_discard_if;
diff --git a/src/panfrost/midgard/midgard_derivatives.c b/src/panfrost/midgard/midgard_derivatives.c
index d243a00..56ae01a 100644
--- a/src/panfrost/midgard/midgard_derivatives.c
+++ b/src/panfrost/midgard/midgard_derivatives.c
@@ -95,13 +95,8 @@ void
 midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
 {
    /* Create texture instructions */
-
-   unsigned nr_components = nir_dest_num_components(instr->dest.dest);
-
    midgard_instruction ins = {
       .type = TAG_TEXTURE_4,
-      .mask = mask_of(nr_components),
-      .dest = nir_dest_index(&instr->dest.dest),
       .dest_type = nir_type_float32,
       .src =
          {
@@ -127,9 +122,7 @@ midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
          },
    };
 
-   if (!instr->dest.dest.is_ssa)
-      ins.mask &= instr->dest.write_mask;
-
+   ins.dest = nir_dest_index_with_mask(&instr->dest.dest, &ins.mask);
    emit_mir_instruction(ctx, ins);
 }
 
-- 
2.7.4