freedreno/ir3: images can be arrays too
authorRob Clark <robdclark@gmail.com>
Sat, 2 Jun 2018 00:20:43 +0000 (20:20 -0400)
committerRob Clark <robdclark@gmail.com>
Tue, 19 Jun 2018 17:02:28 +0000 (13:02 -0400)
Seems I previously toally forgot about 2d-arrays, etc..

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a5xx/fd5_image.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c

index 6699375..a561643 100644 (file)
@@ -81,17 +81,43 @@ static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
                lvl = 0;
                img->offset = pimg->u.buf.offset;
                img->pitch  = pimg->u.buf.size;
-               img->array_pitch = 0;
        } else {
                lvl = pimg->u.tex.level;
                img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
                img->pitch  = rsc->slices[lvl].pitch * rsc->cpp;
-               img->array_pitch = rsc->layer_size;
        }
 
        img->width     = u_minify(prsc->width0, lvl);
        img->height    = u_minify(prsc->height0, lvl);
-       img->depth     = u_minify(prsc->depth0, lvl);
+
+       unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
+
+       switch (prsc->target) {
+       case PIPE_TEXTURE_RECT:
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_2D:
+               img->array_pitch = rsc->layer_size;
+               img->depth = 1;
+               break;
+       case PIPE_TEXTURE_1D_ARRAY:
+       case PIPE_TEXTURE_2D_ARRAY:
+               img->array_pitch = rsc->layer_size;
+               img->depth = layers;
+               break;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               img->array_pitch = rsc->layer_size;
+               img->depth = layers / 6;
+               break;
+       case PIPE_TEXTURE_3D:
+               img->array_pitch = rsc->slices[lvl].size0;
+               img->depth = u_minify(prsc->depth0, lvl);
+               break;
+       default:
+               img->array_pitch = 0;
+               img->depth = 0;
+               break;
+       }
 }
 
 static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
index ed87eff..a14239b 100644 (file)
@@ -1849,26 +1849,46 @@ get_image_slot(struct ir3_context *ctx, const nir_variable *var)
        return max_samplers - var->data.driver_location - 1;
 }
 
+/* see tex_info() for equiv logic for texture instructions.. it would be
+ * nice if this could be better unified..
+ */
 static unsigned
-get_image_coords(const nir_variable *var)
+get_image_coords(const nir_variable *var, unsigned *flagsp)
 {
-       switch (glsl_get_sampler_dim(glsl_without_array(var->type))) {
+       const struct glsl_type *type = glsl_without_array(var->type);
+       unsigned coords, flags = 0;
+
+       switch (glsl_get_sampler_dim(type)) {
        case GLSL_SAMPLER_DIM_1D:
        case GLSL_SAMPLER_DIM_BUF:
-               return 1;
+               coords = 1;
                break;
        case GLSL_SAMPLER_DIM_2D:
        case GLSL_SAMPLER_DIM_RECT:
        case GLSL_SAMPLER_DIM_EXTERNAL:
        case GLSL_SAMPLER_DIM_MS:
-               return 2;
+               coords = 2;
+               break;
        case GLSL_SAMPLER_DIM_3D:
        case GLSL_SAMPLER_DIM_CUBE:
-               return 3;
+               flags |= IR3_INSTR_3D;
+               coords = 3;
+               break;
        default:
                unreachable("bad sampler dim");
                return 0;
        }
+
+       if (glsl_sampler_type_is_array(type)) {
+               /* note: unlike tex_info(), adjust # of coords to include array idx: */
+               coords++;
+               flags |= IR3_INSTR_A;
+       }
+
+       if (flagsp)
+               *flagsp = flags;
+
+       return coords;
 }
 
 static type_t
@@ -1893,7 +1913,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
 {
        struct ir3_block *b = ctx->block;
        struct ir3_instruction *offset;
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
 
        /* to calculate the byte offset (yes, uggg) we need (up to) three
         * const values to know the bytes per pixel, and y and z stride:
@@ -1940,13 +1960,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        const nir_variable *var = intr->variables[0]->var;
        struct ir3_instruction *sam;
        struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned flags, ncoords = get_image_coords(var, &flags);
        unsigned tex_idx = get_image_slot(ctx, var);
        type_t type = get_image_type(var);
-       unsigned flags = 0;
-
-       if (ncoords == 3)
-               flags |= IR3_INSTR_3D;
 
        sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags,
                        tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL);
@@ -1966,7 +1982,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        struct ir3_instruction *stib, *offset;
        struct ir3_instruction * const *value = get_src(ctx, &intr->src[2]);
        struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
        unsigned tex_idx = get_image_slot(ctx, var);
 
        /* src0 is value
@@ -2001,19 +2017,38 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 {
        struct ir3_block *b = ctx->block;
        const nir_variable *var = intr->variables[0]->var;
-       unsigned ncoords = get_image_coords(var);
        unsigned tex_idx = get_image_slot(ctx, var);
        struct ir3_instruction *sam, *lod;
-       unsigned flags = 0;
-
-       if (ncoords == 3)
-               flags = IR3_INSTR_3D;
+       unsigned flags, ncoords = get_image_coords(var, &flags);
 
        lod = create_immed(b, 0);
        sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
                        tex_idx, tex_idx, lod, NULL);
 
-       split_dest(b, dst, sam, 0, ncoords);
+       /* Array size actually ends up in .w rather than .z. This doesn't
+        * matter for miplevel 0, but for higher mips the value in z is
+        * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+        * returned, which means that we have to add 1 to it for arrays for
+        * a3xx.
+        *
+        * Note use a temporary dst and then copy, since the size of the dst
+        * array that is passed in is based on nir's understanding of the
+        * result size, not the hardware's
+        */
+       struct ir3_instruction *tmp[4];
+
+       split_dest(b, tmp, sam, 0, 4);
+
+       for (unsigned i = 0; i < ncoords; i++)
+               dst[i] = tmp[i];
+
+       if (flags & IR3_INSTR_A) {
+               if (ctx->levels_add_one) {
+                       dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0);
+               } else {
+                       dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32);
+               }
+       }
 }
 
 /* src[] = { coord, sample_index, value, compare }. const_index[] = {} */
@@ -2024,7 +2059,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        const nir_variable *var = intr->variables[0]->var;
        struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
        struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
-       unsigned ncoords = get_image_coords(var);
+       unsigned ncoords = get_image_coords(var, NULL);
 
        image = create_immed(b, get_image_slot(ctx, var));