nvc0: fix 3d images
authorIlia Mirkin <imirkin@alum.mit.edu>
Sun, 16 May 2021 03:18:52 +0000 (23:18 -0400)
committerMarge Bot <eric+marge@anholt.net>
Sun, 6 Jun 2021 18:26:26 +0000 (18:26 +0000)
The hardware has no support for 3d image loads/stores. So present the
image as a larger 2d image and fudge the coordinates. Note that a 2d
image (in the shader) may be backed by a slice of a 3d image, so we
always have to do the coordinate adjustments for 2d as well.

This is largely copied from the nv50 support, which has the same
restriction, with extra care taken to differentiate loads (which
specifies the X coordinate in bytes) and stores, which specifies it in
(formatted) pixels.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10820>

src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
src/gallium/drivers/nouveau/nvc0/nvc0_tex.c

index e45fc08..83634f1 100644 (file)
@@ -2415,14 +2415,100 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
    // calculate pixel offset
    if (su->op == OP_SULDP || su->op == OP_SUREDP) {
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
-      su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
+      su->setSrc(0, (src[0] = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), src[0], v)));
    }
 
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       assert(dim > 1);
-      su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
+      su->setSrc(2, (src[2] = bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)));
+   }
+
+   // 3d is special-cased. Note that a single "slice" of a 3d image may
+   // also be attached as 2d, so we have to do the same 3d processing for
+   // 2d as well, just in case. In order to remap a 3d image onto a 2d
+   // image, we have to retile it "by hand".
+   if (su->tex.target == TEX_TARGET_3D || su->tex.target == TEX_TARGET_2D) {
+      Value *z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+      Value *y_size_aligned =
+         bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+                    loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM_Y, su->tex.bindless),
+                    bld.loadImm(NULL, 0x0000ffff));
+      // Add the z coordinate for actual 3d-images
+      if (dim > 2)
+         src[2] = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), z, src[2]);
+      else
+         src[2] = z;
+
+      // Compute the surface parameters from tile shifts
+      Value *tile_shift[3];
+      Value *tile_extbf[3];
+      // Fetch the "real" tiling parameters of the underlying surface
+      for (int i = 0; i < 3; i++) {
+         tile_extbf[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 16));
+         tile_shift[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 24));
+      }
+
+      // However for load/atomics, we use byte-indexing. And for byte
+      // indexing, the X tile size is always the same. This leads to slightly
+      // better code.
+      if (su->op == OP_SULDP || su->op == OP_SUREDP) {
+         tile_extbf[0] = bld.loadImm(NULL, 0x600);
+         tile_shift[0] = bld.loadImm(NULL, 6);
+      }
+
+      // Compute the location of given coordinate, both inside the tile as
+      // well as which (linearly-laid out) tile it's in.
+      Value *coord_in_tile[3];
+      Value *tile[3];
+      for (int i = 0; i < 3; i++) {
+         coord_in_tile[i] = bld.mkOp2v(OP_EXTBF, TYPE_U32, bld.getSSA(), src[i], tile_extbf[i]);
+         tile[i] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), src[i], tile_shift[i]);
+      }
+
+      // Based on the "real" tiling parameters, compute x/y coordinates in the
+      // larger surface with 2d tiling that was supplied to the hardware. This
+      // was determined and verified with the help of the tiling pseudocode in
+      // the envytools docs.
+      //
+      // adj_x = x_coord_in_tile + x_tile * x_tile_size * z_tile_size +
+      //         z_coord_in_tile * x_tile_size
+      // adj_y = y_coord_in_tile + y_tile * y_tile_size +
+      //         z_tile * y_tile_size * y_tiles
+      //
+      // Note: STRIDE_Y = y_tile_size * y_tiles
+
+      su->setSrc(0, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[0],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[0],
+                                  bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                                             tile_shift[2], tile_shift[0]))),
+            bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[2], tile_shift[0])));
+
+      su->setSrc(1, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(),
+                       tile[2], y_size_aligned),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[1],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[1], tile_shift[1]))));
+
+      if (su->tex.target == TEX_TARGET_3D) {
+         su->moveSources(3, -1);
+         su->tex.target = TEX_TARGET_2D;
+      }
    }
 
    // prevent read fault when the image is not actually bound
@@ -2438,7 +2524,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
       assert(format->components != 0);
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
-                TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+                TYPE_U32, bld.loadImm(NULL, ffs(blockwidth / 8) - 1),
                 loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
index c574169..a9b475e 100644 (file)
@@ -1102,19 +1102,27 @@ nvc0_set_surface_info(struct nouveau_pushbuf *push,
 
    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
     * offset and to check if the format doesn't mismatch. */
-   info[12] = util_format_get_blocksize(view->format);
+   info[12] = ffs(util_format_get_blocksize(view->format)) - 1;
 
    if (res->base.target == PIPE_BUFFER) {
       info[0]  = address >> 8;
       info[2]  = width;
    } else {
       struct nv50_miptree *mt = nv50_miptree(&res->base);
+      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
+      unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;
+      unsigned nby = align(util_format_get_nblocksy(view->format, height),
+                           NVC0_TILE_SIZE_Y(lvl->tile_mode));
 
+      /* NOTE: this does not precisely match nve4; the values are made to be
+       * easier for the shader to consume.
+       */
       info[0]  = address >> 8;
-      info[2]  = width;
-      info[4]  = height;
+      info[2]  = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;
+      info[4]  = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;
       info[5]  = mt->layer_stride >> 8;
-      info[6]  = depth;
+      info[6]  = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;
+      info[7]  = z;
       info[14] = mt->ms_x;
       info[15] = mt->ms_y;
    }
@@ -1167,24 +1175,31 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
          } else {
             struct nv50_miptree *mt = nv50_miptree(view->resource);
             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
-            const unsigned z = view->u.tex.first_layer;
+            unsigned adjusted_width = width, adjusted_height = height;
 
             if (mt->layout_3d) {
-               address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
-               if (depth >= 1) {
-                  pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
-                                     "3D images are not supported!");
-                  debug_printf("3D images are not supported!\n");
-               }
+               // We have to adjust the size of the 3d surface to be
+               // accessible within 2d limits. The size of each z tile goes
+               // into the x direction, while the number of z tiles goes into
+               // the y direction.
+               const unsigned nbx = util_format_get_nblocksx(view->format, width);
+               const unsigned nby = util_format_get_nblocksy(view->format, height);
+               const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);
+               const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
+               const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
+
+               adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;
+               adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);
             } else {
+               const unsigned z = view->u.tex.first_layer;
                address += mt->layer_stride * z;
             }
             address += lvl->offset;
 
             PUSH_DATAh(push, address);
             PUSH_DATA (push, address);
-            PUSH_DATA (push, width << mt->ms_x);
-            PUSH_DATA (push, height << mt->ms_y);
+            PUSH_DATA (push, adjusted_width << mt->ms_x);
+            PUSH_DATA (push, adjusted_height << mt->ms_y);
             PUSH_DATA (push, rt);
             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
          }