ir3: Initial support for private memory

author Connor Abbott <cwabbott0@gmail.com>

Thu, 29 Oct 2020 14:05:24 +0000 (15:05 +0100)

committer Connor Abbott <cwabbott0@gmail.com>

Thu, 19 Nov 2020 16:55:03 +0000 (17:55 +0100)
author Connor Abbott <cwabbott0@gmail.com>
Thu, 29 Oct 2020 14:05:24 +0000 (15:05 +0100)
committer Connor Abbott <cwabbott0@gmail.com>
Thu, 19 Nov 2020 16:55:03 +0000 (17:55 +0100)
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c

index 0f5a0c2..431c757 100644 (file)
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -729,6 +729,11 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                 src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
         }
  
+       if ((instr->opc == OPC_STP || instr->opc == OPC_LDP) &&
+               src2->iim_val * type_size(instr->cat6.type) > 32) {
+               info->multi_dword_ldp_stp = true;
+       }
+
         /* TODO we need a more comprehensive list about which instructions
          * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
          * indicate to use the src_off encoding even if offset is zero
@@ -938,6 +943,7 @@ void * ir3_assemble(struct ir3_shader_variant *v)
         info->max_reg       = -1;
         info->max_half_reg  = -1;
         info->max_const     = -1;
+       info->multi_dword_ldp_stp = false;
  
         uint32_t instr_count = 0;
         foreach_block (block, &shader->block_list) {
@@ -1464,6 +1470,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
                         if ((instr->opc == OPC_STL) && (n != 2))
                                 return false;
  
+                       if ((instr->opc == OPC_LDP) && (n == 0))
+                               return false;
+
+                       if ((instr->opc == OPC_STP) && (n != 2))
+                               return false;
+
                         if (instr->opc == OPC_STLW && n == 0)
                                 return false;
  
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index 262f2a2..c5021cc 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -64,6 +64,7 @@ struct ir3_info {
         int8_t   max_reg;   /* highest GPR # used by shader */
         int8_t   max_half_reg;
         int16_t  max_const;
+       bool     multi_dword_ldp_stp;
  
         /* number of sync bits: */
         uint16_t ss, sy;
@@ -400,6 +401,8 @@ struct ir3_instruction {
                 IR3_BARRIER_BUFFER_W   = 1 << 6,
                 IR3_BARRIER_ARRAY_R    = 1 << 7,
                 IR3_BARRIER_ARRAY_W    = 1 << 8,
+               IR3_BARRIER_PRIVATE_R  = 1 << 9,
+               IR3_BARRIER_PRIVATE_W  = 1 << 10,
         } barrier_class, barrier_conflict;
  
         /* Entry in ir3_block's instruction list: */
@@ -1692,9 +1695,11 @@ INSTR2(LDLV)
  INSTR3(LDG)
  INSTR3(LDL)
  INSTR3(LDLW)
+INSTR3(LDP)
  INSTR3(STG)
  INSTR3(STL)
  INSTR3(STLW)
+INSTR3(STP)
  INSTR1(RESINFO)
  INSTR1(RESFMT)
  INSTR2(ATOMIC_ADD)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index 154666a..a05b1d9 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1052,6 +1052,57 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         return atomic;
  }
  
+/* src[] = { offset }. */
+static void
+emit_intrinsic_load_scratch(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+               struct ir3_instruction **dst)
+{
+       struct ir3_block *b = ctx->block;
+       struct ir3_instruction *ldp, *offset;
+
+       offset = ir3_get_src(ctx, &intr->src[0])[0];
+
+       ldp = ir3_LDP(b, offset, 0,
+                       create_immed(b, intr->num_components), 0,
+                       create_immed(b, 0), 0);
+
+       ldp->cat6.type = utype_dst(intr->dest);
+       ldp->regs[0]->wrmask = MASK(intr->num_components);
+
+       ldp->barrier_class = IR3_BARRIER_PRIVATE_R;
+       ldp->barrier_conflict = IR3_BARRIER_PRIVATE_W;
+
+       ir3_split_dest(b, dst, ldp, 0, intr->num_components);
+}
+
+/* src[] = { value, offset }. const_index[] = { write_mask } */
+static void
+emit_intrinsic_store_scratch(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+       struct ir3_block *b = ctx->block;
+       struct ir3_instruction *stp, *offset;
+       struct ir3_instruction * const *value;
+       unsigned wrmask, ncomp;
+
+       value  = ir3_get_src(ctx, &intr->src[0]);
+       offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+       wrmask = nir_intrinsic_write_mask(intr);
+       ncomp  = ffs(~wrmask) - 1;
+
+       assert(wrmask == BITFIELD_MASK(intr->num_components));
+
+       stp = ir3_STP(b, offset, 0,
+               ir3_create_collect(ctx, value, ncomp), 0,
+               create_immed(b, ncomp), 0);
+       stp->cat6.dst_offset = 0;
+       stp->cat6.type = utype_src(intr->src[0]);
+       stp->barrier_class = IR3_BARRIER_PRIVATE_W;
+       stp->barrier_conflict = IR3_BARRIER_PRIVATE_R | IR3_BARRIER_PRIVATE_W;
+
+       array_insert(b, b->keeps, stp);
+}
+
  struct tex_src_info {
         /* For prefetch */
         unsigned tex_base, samp_base, tex_idx, samp_idx;
@@ -1714,6 +1765,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         case nir_intrinsic_shared_atomic_comp_swap:
                 dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
                 break;
+       case nir_intrinsic_load_scratch:
+               emit_intrinsic_load_scratch(ctx, intr, dst);
+               break;
+       case nir_intrinsic_store_scratch:
+               emit_intrinsic_store_scratch(ctx, intr);
+               break;
         case nir_intrinsic_image_load:
                 emit_intrinsic_load_image(ctx, intr, dst);
                 break;
@@ -3347,6 +3404,8 @@ emit_instructions(struct ir3_context *ctx)
         ctx->so->cull_mask = MASK(ctx->s->info.cull_distance_array_size) <<
                 ctx->s->info.clip_distance_array_size;
  
+       ctx->so->pvtmem_size = ctx->s->scratch_size;
+
         /* NOTE: need to do something more clever when we support >1 fxn */
         nir_foreach_register (reg, &fxn->registers) {
                 ir3_declare_array(ctx, reg);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index d6d891a..2faa802 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -261,6 +261,7 @@ should_split_wrmask(const nir_instr *instr, const void *data)
         case nir_intrinsic_store_ssbo:
         case nir_intrinsic_store_shared:
         case nir_intrinsic_store_global:
+       case nir_intrinsic_store_scratch:
                 return true;
         default:
                 return false;
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c

index 4cbe5f5..da43034 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -144,6 +144,11 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v)
         if (compiler->gpu_id >= 400)
                 v->constlen = align(v->constlen, 4);
  
+       /* Use the per-wave layout by default on a6xx. It should result in better
+        * performance when loads/stores are to a uniform index.
+        */
+       v->pvtmem_per_wave = compiler->gpu_id >= 600 && !v->info.multi_dword_ldp_stp;
+
         fixup_regfootprint(v);
  
         return bin;
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index bba3c62..6b5e2af 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -554,6 +554,11 @@ struct ir3_shader_variant {
          */
         unsigned constlen;
  
+       /* The private memory size in bytes */
+       unsigned pvtmem_size;
+       /* Whether we should use the new per-wave layout rather than per-fiber. */
+       bool pvtmem_per_wave;
+
         /* About Linkage:
          *   + Let the frag shader determine the position/compmask for the
          *     varyings, since it is the place where we know if the varying
author	Connor Abbott <cwabbott0@gmail.com>
	Thu, 29 Oct 2020 14:05:24 +0000 (15:05 +0100)
committer	Connor Abbott <cwabbott0@gmail.com>
	Thu, 19 Nov 2020 16:55:03 +0000 (17:55 +0100)
src/freedreno/ir3/ir3.c		patch \| blob \| history
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history