freedreno/ir3: Add support for load_kernel_input

author Rob Clark <robdclark@chromium.org>

Sun, 8 Aug 2021 20:31:54 +0000 (13:31 -0700)

committer Marge Bot <eric+marge@anholt.net>

Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
author Rob Clark <robdclark@chromium.org>
Sun, 8 Aug 2021 20:31:54 +0000 (13:31 -0700)
committer Marge Bot <eric+marge@anholt.net>
Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index 46d8996..49ef01f 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -851,6 +851,41 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
     }
  }
  
+/* Load a kernel param: src[] = { address }. */
+static void
+emit_intrinsic_load_kernel_input(struct ir3_context *ctx,
+                                 nir_intrinsic_instr *intr,
+                                 struct ir3_instruction **dst)
+{
+   const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
+   struct ir3_block *b = ctx->block;
+   unsigned offset = nir_intrinsic_base(intr);
+   unsigned p = regid(const_state->offsets.kernel_params, 0);
+
+   struct ir3_instruction *src0 = ir3_get_src(ctx, &intr->src[0])[0];
+
+   if (is_same_type_mov(src0) && (src0->srcs[0]->flags & IR3_REG_IMMED)) {
+      offset += src0->srcs[0]->iim_val;
+
+      /* kernel param position is in bytes, but constant space is 32b registers: */
+      compile_assert(ctx, !(offset & 0x3));
+
+      dst[0] = create_uniform(b, p + (offset / 4));
+   } else {
+      /* kernel param position is in bytes, but constant space is 32b registers: */
+      compile_assert(ctx, !(offset & 0x3));
+
+      /* TODO we should probably be lowering this in nir, and also handling
+       * non-32b inputs.. Also we probably don't want to be using
+       * SP_MODE_CONTROL.CONSTANT_DEMOTION_ENABLE for KERNEL shaders..
+       */
+      src0 = ir3_SHR_B(b, src0, 0, create_immed(b, 2), 0);
+
+      dst[0] = create_uniform_indirect(b, offset / 4, TYPE_U32,
+                                       ir3_get_addr0(ctx, src0, 1));
+   }
+}
+
  /* src[] = { block_index } */
  static void
  emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@@ -1801,6 +1836,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
     case nir_intrinsic_load_input:
        setup_input(ctx, intr);
        break;
+   case nir_intrinsic_load_kernel_input:
+      emit_intrinsic_load_kernel_input(ctx, intr, dst);
+      break;
     /* All SSBO intrinsics should have been lowered by 'lower_io_offsets'
      * pass and replaced by an ir3-specifc version that adds the
      * dword-offset in the last source.
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 85f5048..30c07ae 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -888,6 +888,11 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
        constoff += align(cnt, 4) / 4;
     }
  
+   if (v->type == MESA_SHADER_KERNEL) {
+      const_state->offsets.kernel_params = constoff;
+      constoff += align(v->shader->cs.req_input_mem, 4) / 4;
+   }
+
     if (const_state->num_driver_params > 0) {
        /* num_driver_params in dwords.  we only need to align to vec4s for the
         * common case of immediate constant uploads, but for indirect dispatch
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index 6191ab8..d135dcc 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -146,12 +146,14 @@ struct ir3_ubo_analysis_state {
   *    user consts
   *    UBO addresses
   *    SSBO sizes
+ *    image dimensions
   *    if (vertex shader) {
- *        driver params (IR3_DP_*)
+ *        driver params (IR3_DP_VS_COUNT)
   *        if (stream_output.num_outputs > 0)
   *           stream-out addresses
   *    } else if (compute_shader) {
- *        driver params (IR3_DP_*)
+ *        kernel params
+ *        driver params (IR3_DP_CS_COUNT)
   *    }
   *    immediates
   *
@@ -171,6 +173,7 @@ struct ir3_const_state {
        /* user const start at zero */
        unsigned ubo;
        unsigned image_dims;
+      unsigned kernel_params;
        unsigned driver_param;
        unsigned tfbo;
        unsigned primitive_param;
@@ -740,6 +743,14 @@ struct ir3_shader {
     struct nir_shader *nir;
     struct ir3_stream_output_info stream_output;
  
+   /* per shader stage specific info: */
+   union {
+      /* for compute shaders: */
+      struct {
+         unsigned req_input_mem;    /* in dwords */
+      } cs;
+   };
+
     struct ir3_shader_variant *variants;
     mtx_t variants_lock;
  
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h

index 8186552..c2c239e 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -433,6 +433,22 @@ emit_common_consts(const struct ir3_shader_variant *v,
     }
  }
  
+/* emit kernel params */
+static inline void
+emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
+                   struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
+   assert_dt
+{
+   const struct ir3_const_state *const_state = ir3_const_state(v);
+   uint32_t offset = const_state->offsets.kernel_params;
+   if (v->constlen > offset) {
+      ring_wfi(ctx->batch, ring);
+      emit_const_user(ring, v, offset * 4,
+                      align(v->shader->cs.req_input_mem, 4),
+                      info->input);
+   }
+}
+
  static inline void
  ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
                            struct fd_ringbuffer *ring, struct fd_context *ctx,
@@ -552,6 +568,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v,
     debug_assert(gl_shader_stage_is_compute(v->type));
  
     emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
+   emit_kernel_params(ctx, v, ring, info);
  
     /* emit compute-shader driver-params: */
     const struct ir3_const_state *const_state = ir3_const_state(v);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 701fc47..041ba15 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -297,6 +297,8 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
     }
  
     struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL);
+   shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4;     /* byte->dword */
+
     struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
  
     util_queue_fence_init(&hwcso->ready);
author	Rob Clark <robdclark@chromium.org>
	Sun, 8 Aug 2021 20:31:54 +0000 (13:31 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_const.h		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_gallium.c		patch \| blob \| history