ir3: Convert to register intrinsics

author Alyssa Rosenzweig <alyssa@rosenzweig.io>

Wed, 12 Jul 2023 18:21:43 +0000 (14:21 -0400)

committer Alyssa Rosenzweig <alyssa@rosenzweig.io>

Fri, 14 Jul 2023 13:28:48 +0000 (09:28 -0400)
author Alyssa Rosenzweig <alyssa@rosenzweig.io>
Wed, 12 Jul 2023 18:21:43 +0000 (14:21 -0400)
committer Alyssa Rosenzweig <alyssa@rosenzweig.io>
Fri, 14 Jul 2023 13:28:48 +0000 (09:28 -0400)
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index 0f7096a..9cf28f6 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -580,7 +580,7 @@ struct ir3_array {
     unsigned length;
     unsigned id;
  
-   struct nir_register *r;
+   struct nir_ssa_def *r;
  
     /* To avoid array write's from getting DCE'd, keep track of the
      * most recent write.  Any array access depends on the most
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index 4305682..f810ad9 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1992,6 +1992,63 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
     const unsigned primitive_map = const_state->offsets.primitive_map * 4;
  
     switch (intr->intrinsic) {
+   case nir_intrinsic_decl_reg:
+      /* There's logically nothing to do, but this has a destination in NIR so
+       * plug in something... It will get DCE'd.
+       */
+      dst[0] = create_immed(ctx->block, 0);
+      break;
+
+   case nir_intrinsic_load_reg:
+   case nir_intrinsic_load_reg_indirect: {
+      struct ir3_array *arr = ir3_get_array(ctx, intr->src[0].ssa);
+      struct ir3_instruction *addr = NULL;
+
+      if (intr->intrinsic == nir_intrinsic_load_reg_indirect) {
+         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[1])[0],
+                              dest_components);
+      }
+
+      ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa);
+      assert(dest_components == nir_intrinsic_num_components(decl));
+
+      for (unsigned i = 0; i < dest_components; i++) {
+         unsigned n = nir_intrinsic_base(intr) * dest_components + i;
+         compile_assert(ctx, n < arr->length);
+         dst[i] = ir3_create_array_load(ctx, arr, n, addr);
+      }
+
+      break;
+   }
+
+   case nir_intrinsic_store_reg:
+   case nir_intrinsic_store_reg_indirect: {
+      struct ir3_array *arr = ir3_get_array(ctx, intr->src[1].ssa);
+      unsigned num_components = nir_src_num_components(intr->src[0]);
+      struct ir3_instruction *addr = NULL;
+
+      ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa);
+      assert(num_components == nir_intrinsic_num_components(decl));
+
+      struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[0]);
+
+      if (intr->intrinsic == nir_intrinsic_store_reg_indirect) {
+         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[2])[0],
+                              num_components);
+      }
+
+      u_foreach_bit(i, nir_intrinsic_write_mask(intr)) {
+         assert(i < num_components);
+
+         unsigned n = nir_intrinsic_base(intr) * num_components + i;
+         compile_assert(ctx, n < arr->length);
+         if (value[i])
+            ir3_create_array_store(ctx, arr, n, value[i], addr);
+      }
+
+      break;
+   }
+
     case nir_intrinsic_load_uniform:
        idx = nir_intrinsic_base(intr);
        if (nir_src_is_const(intr->src[0])) {
@@ -4377,8 +4434,8 @@ emit_instructions(struct ir3_context *ctx)
     ctx->so->shared_size = ctx->s->info.shared_size;
  
     /* NOTE: need to do something more clever when we support >1 fxn */
-   nir_foreach_register (reg, &fxn->registers) {
-      ir3_declare_array(ctx, reg);
+   nir_foreach_reg_decl (decl, fxn) {
+      ir3_declare_array(ctx, decl);
     }
  
     if (ctx->so->type == MESA_SHADER_TESS_CTRL &&
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c

index 6f8f738..3e995a1 100644 (file)
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -29,6 +29,8 @@
  #include "ir3_image.h"
  #include "ir3_nir.h"
  #include "ir3_shader.h"
+#include "nir.h"
+#include "nir_intrinsics_indices.h"
  
  struct ir3_context *
  ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
@@ -87,9 +89,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
      */
     bool progress = false;
     bool needs_late_alg = false;
-   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
+   NIR_PASS(progress, ctx->s, nir_lower_locals_to_reg_intrinsics, 1);
  
-   /* we could need cleanup after lower_locals_to_regs */
+   /* we could need cleanup after lower_locals_to_reg_intrinsics */
     while (progress) {
        progress = false;
        NIR_PASS(progress, ctx->s, nir_opt_algebraic);
@@ -98,9 +100,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
     }
  
     /* We want to lower nir_op_imul as late as possible, to catch also
-    * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
-    * However, we want a final swing of a few passes to have a chance
-    * at optimizing the result.
+    * those generated by earlier passes (e.g,
+    * nir_lower_locals_to_reg_intrinsics).  However, we want a final swing of a
+    * few passes to have a chance at optimizing the result.
      */
     progress = false;
     NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
@@ -201,17 +203,9 @@ ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
  struct ir3_instruction **
  ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
  {
-   struct ir3_instruction **value;
+   assert(dst->is_ssa);
+   struct ir3_instruction **value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
  
-   if (dst->is_ssa) {
-      value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
-   } else {
-      value = ralloc_array(ctx, struct ir3_instruction *, n);
-   }
-
-   /* NOTE: in non-ssa case, we don't really need to store last_dst
-    * but this helps us catch cases where put_dst() call is forgotten
-    */
     compile_assert(ctx, !ctx->last_dst);
     ctx->last_dst = value;
     ctx->last_dst_n = n;
@@ -222,31 +216,11 @@ ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
  struct ir3_instruction *const *
  ir3_get_src(struct ir3_context *ctx, nir_src *src)
  {
-   if (src->is_ssa) {
-      struct hash_entry *entry;
-      entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
-      compile_assert(ctx, entry);
-      return entry->data;
-   } else {
-      nir_register *reg = src->reg.reg;
-      struct ir3_array *arr = ir3_get_array(ctx, reg);
-      unsigned num_components = arr->r->num_components;
-      struct ir3_instruction *addr = NULL;
-      struct ir3_instruction **value =
-         ralloc_array(ctx, struct ir3_instruction *, num_components);
-
-      if (src->reg.indirect)
-         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
-                              reg->num_components);
-
-      for (unsigned i = 0; i < num_components; i++) {
-         unsigned n = src->reg.base_offset * reg->num_components + i;
-         compile_assert(ctx, n < arr->length);
-         value[i] = ir3_create_array_load(ctx, arr, n, addr);
-      }
-
-      return value;
-   }
+   assert(src->is_ssa);
+   struct hash_entry *entry;
+   entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
+   compile_assert(ctx, entry);
+   return entry->data;
  }
  
  void
@@ -279,27 +253,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
        }
     }
  
-   if (!dst->is_ssa) {
-      nir_register *reg = dst->reg.reg;
-      struct ir3_array *arr = ir3_get_array(ctx, reg);
-      unsigned num_components = ctx->last_dst_n;
-      struct ir3_instruction *addr = NULL;
-
-      if (dst->reg.indirect)
-         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
-                              reg->num_components);
-
-      for (unsigned i = 0; i < num_components; i++) {
-         unsigned n = dst->reg.base_offset * reg->num_components + i;
-         compile_assert(ctx, n < arr->length);
-         if (!ctx->last_dst[i])
-            continue;
-         ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
-      }
-
-      ralloc_free(ctx->last_dst);
-   }
-
+   assert(dst->is_ssa);
     ctx->last_dst = NULL;
     ctx->last_dst_n = 0;
  }
@@ -543,7 +497,7 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
   */
  
  void
-ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
+ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl)
  {
     struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
     arr->id = ++ctx->num_arrays;
@@ -554,15 +508,17 @@ ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
      * It would be nice if there was a nir pass to convert arrays of
      * length 1 to ssa.
      */
-   arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
+   arr->length = nir_intrinsic_num_components(decl) *
+                 MAX2(1, nir_intrinsic_num_array_elems(decl));
+
     compile_assert(ctx, arr->length > 0);
-   arr->r = reg;
-   arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16;
+   arr->r = &decl->dest.ssa;
+   arr->half = ir3_bitsize(ctx, nir_intrinsic_bit_size(decl)) <= 16;
     list_addtail(&arr->node, &ctx->ir->array_list);
  }
  
  struct ir3_array *
-ir3_get_array(struct ir3_context *ctx, nir_register *reg)
+ir3_get_array(struct ir3_context *ctx, nir_ssa_def *reg)
  {
     foreach_array (arr, &ctx->ir->array_list) {
        if (arr->r == reg)
@@ -622,34 +578,6 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
     struct ir3_register *dst;
     unsigned flags = 0;
  
-   /* if not relative store, don't create an extra mov, since that
-    * ends up being difficult for cp to remove.
-    *
-    * Also, don't skip the mov if the src is meta (like fanout/split),
-    * since that creates a situation that RA can't really handle properly.
-    */
-   if (!address && !is_meta(src)) {
-      dst = src->dsts[0];
-
-      src->barrier_class |= IR3_BARRIER_ARRAY_W;
-      src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-
-      dst->flags |= IR3_REG_ARRAY;
-      dst->size = arr->length;
-      dst->array.id = arr->id;
-      dst->array.offset = n;
-      dst->array.base = INVALID_REG;
-
-      if (arr->last_write && arr->last_write->instr->block == src->block)
-         ir3_reg_set_last_array(src, dst, arr->last_write);
-
-      arr->last_write = dst;
-
-      array_insert(block, block->keeps, src);
-
-      return;
-   }
-
     mov = ir3_instr_create(block, OPC_MOV, 1, 1);
     if (arr->half) {
        mov->cat1.src_type = TYPE_U16;
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h

index 22c7b1b..ecd8e9b 100644 (file)
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -243,8 +243,8 @@ struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,
  struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,
                                            struct ir3_instruction *src);
  
-void ir3_declare_array(struct ir3_context *ctx, nir_register *reg);
-struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg);
+void ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl);
+struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_ssa_def *reg);
  struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
                                                struct ir3_array *arr, int n,
                                                struct ir3_instruction *address);
author	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Wed, 12 Jul 2023 18:21:43 +0000 (14:21 -0400)
committer	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Fri, 14 Jul 2023 13:28:48 +0000 (09:28 -0400)
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_context.c		patch \| blob \| history
src/freedreno/ir3/ir3_context.h		patch \| blob \| history