freedreno/ir3: images can be arrays too

author Rob Clark <robdclark@gmail.com>

Sat, 2 Jun 2018 00:20:43 +0000 (20:20 -0400)

committer Rob Clark <robdclark@gmail.com>

Tue, 19 Jun 2018 17:02:28 +0000 (13:02 -0400)
author Rob Clark <robdclark@gmail.com>
Sat, 2 Jun 2018 00:20:43 +0000 (20:20 -0400)
committer Rob Clark <robdclark@gmail.com>
Tue, 19 Jun 2018 17:02:28 +0000 (13:02 -0400)
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c

index 6699375..a561643 100644 (file)
--- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
@@ -81,17 +81,43 @@ static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
                 lvl = 0;
                 img->offset = pimg->u.buf.offset;
                 img->pitch  = pimg->u.buf.size;
-               img->array_pitch = 0;
         } else {
                 lvl = pimg->u.tex.level;
                 img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
                 img->pitch  = rsc->slices[lvl].pitch * rsc->cpp;
-               img->array_pitch = rsc->layer_size;
         }
  
         img->width     = u_minify(prsc->width0, lvl);
         img->height    = u_minify(prsc->height0, lvl);
-       img->depth     = u_minify(prsc->depth0, lvl);
+
+       unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
+
+       switch (prsc->target) {
+       case PIPE_TEXTURE_RECT:
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_2D:
+               img->array_pitch = rsc->layer_size;
+               img->depth = 1;
+               break;
+       case PIPE_TEXTURE_1D_ARRAY:
+       case PIPE_TEXTURE_2D_ARRAY:
+               img->array_pitch = rsc->layer_size;
+               img->depth = layers;
+               break;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               img->array_pitch = rsc->layer_size;
+               img->depth = layers / 6;
+               break;
+       case PIPE_TEXTURE_3D:
+               img->array_pitch = rsc->slices[lvl].size0;
+               img->depth = u_minify(prsc->depth0, lvl);
+               break;
+       default:
+               img->array_pitch = 0;
+               img->depth = 0;
+               break;
+       }
  }
  
  static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c

index ed87eff..a14239b 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1849,26 +1849,46 @@ get_image_slot(struct ir3_context *ctx, const nir_variable *var)
         return max_samplers - var->data.driver_location - 1;
  }
  
+/* see tex_info() for equiv logic for texture instructions.. it would be
+ * nice if this could be better unified..
+ */
  static unsigned
-get_image_coords(const nir_variable *var)
+get_image_coords(const nir_variable *var, unsigned *flagsp)
  {
-       switch (glsl_get_sampler_dim(glsl_without_array(var->type))) {
+       const struct glsl_type *type = glsl_without_array(var->type);
+       unsigned coords, flags = 0;
+
+       switch (glsl_get_sampler_dim(type)) {
         case GLSL_SAMPLER_DIM_1D:
         case GLSL_SAMPLER_DIM_BUF:
-               return 1;
+               coords = 1;
                 break;
         case GLSL_SAMPLER_DIM_2D:
         case GLSL_SAMPLER_DIM_RECT:
         case GLSL_SAMPLER_DIM_EXTERNAL:
         case GLSL_SAMPLER_DIM_MS:
-               return 2;
+               coords = 2;
+               break;
         case GLSL_SAMPLER_DIM_3D:
         case GLSL_SAMPLER_DIM_CUBE:
-               return 3;
+               flags |= IR3_INSTR_3D;
+               coords = 3;
+               break;
         default:
                 unreachable("bad sampler dim");
                 return 0;
         }
+
+       if (glsl_sampler_type_is_array(type)) {
+               /* note: unlike tex_info(), adjust # of coords to include array idx: */
+               coords++;
+               flags |= IR3_INSTR_A;
+       }
+
+       if (flagsp)
+               *flagsp = flags;
+
+       return coords;
  }
  
  static type_t
@@ -1893,7 +1913,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
  {
         struct ir3_block *b = ctx->block;
         struct ir3_instruction *offset;
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
  
         /* to calculate the byte offset (yes, uggg) we need (up to) three
          * const values to know the bytes per pixel, and y and z stride:
@@ -1940,13 +1960,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         const nir_variable *var = intr->variables[0]->var;
         struct ir3_instruction *sam;
         struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned flags, ncoords = get_image_coords(var, &flags);
         unsigned tex_idx = get_image_slot(ctx, var);
         type_t type = get_image_type(var);
-       unsigned flags = 0;
-
-       if (ncoords == 3)
-               flags |= IR3_INSTR_3D;
  
         sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags,
                         tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL);
@@ -1966,7 +1982,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         struct ir3_instruction *stib, *offset;
         struct ir3_instruction * const *value = get_src(ctx, &intr->src[2]);
         struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
         unsigned tex_idx = get_image_slot(ctx, var);
  
         /* src0 is value
@@ -2001,19 +2017,38 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
  {
         struct ir3_block *b = ctx->block;
         const nir_variable *var = intr->variables[0]->var;
-       unsigned ncoords = get_image_coords(var);
         unsigned tex_idx = get_image_slot(ctx, var);
         struct ir3_instruction *sam, *lod;
-       unsigned flags = 0;
-
-       if (ncoords == 3)
-               flags = IR3_INSTR_3D;
+       unsigned flags, ncoords = get_image_coords(var, &flags);
  
         lod = create_immed(b, 0);
         sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
                         tex_idx, tex_idx, lod, NULL);
  
-       split_dest(b, dst, sam, 0, ncoords);
+       /* Array size actually ends up in .w rather than .z. This doesn't
+        * matter for miplevel 0, but for higher mips the value in z is
+        * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+        * returned, which means that we have to add 1 to it for arrays for
+        * a3xx.
+        *
+        * Note use a temporary dst and then copy, since the size of the dst
+        * array that is passed in is based on nir's understanding of the
+        * result size, not the hardware's
+        */
+       struct ir3_instruction *tmp[4];
+
+       split_dest(b, tmp, sam, 0, 4);
+
+       for (unsigned i = 0; i < ncoords; i++)
+               dst[i] = tmp[i];
+
+       if (flags & IR3_INSTR_A) {
+               if (ctx->levels_add_one) {
+                       dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0);
+               } else {
+                       dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32);
+               }
+       }
  }
  
  /* src[] = { coord, sample_index, value, compare }. const_index[] = {} */
@@ -2024,7 +2059,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         const nir_variable *var = intr->variables[0]->var;
         struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
         struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
  
         image = create_immed(b, get_image_slot(ctx, var));
author	Rob Clark <robdclark@gmail.com>
	Sat, 2 Jun 2018 00:20:43 +0000 (20:20 -0400)
committer	Rob Clark <robdclark@gmail.com>
	Tue, 19 Jun 2018 17:02:28 +0000 (13:02 -0400)
src/gallium/drivers/freedreno/a5xx/fd5_image.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history