nvc0: fix 3d images

author Ilia Mirkin <imirkin@alum.mit.edu>

Sun, 16 May 2021 03:18:52 +0000 (23:18 -0400)

committer Marge Bot <eric+marge@anholt.net>

Sun, 6 Jun 2021 18:26:26 +0000 (18:26 +0000)
author Ilia Mirkin <imirkin@alum.mit.edu>
Sun, 16 May 2021 03:18:52 +0000 (23:18 -0400)
committer Marge Bot <eric+marge@anholt.net>
Sun, 6 Jun 2021 18:26:26 +0000 (18:26 +0000)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index e45fc08..83634f1 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2415,14 +2415,100 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
     // calculate pixel offset
     if (su->op == OP_SULDP || su->op == OP_SUREDP) {
        v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
-      su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
+      su->setSrc(0, (src[0] = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), src[0], v)));
     }
  
     // add array layer offset
     if (su->tex.target.isArray() || su->tex.target.isCube()) {
        v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
        assert(dim > 1);
-      su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
+      su->setSrc(2, (src[2] = bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)));
+   }
+
+   // 3d is special-cased. Note that a single "slice" of a 3d image may
+   // also be attached as 2d, so we have to do the same 3d processing for
+   // 2d as well, just in case. In order to remap a 3d image onto a 2d
+   // image, we have to retile it "by hand".
+   if (su->tex.target == TEX_TARGET_3D || su->tex.target == TEX_TARGET_2D) {
+      Value *z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+      Value *y_size_aligned =
+         bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+                    loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM_Y, su->tex.bindless),
+                    bld.loadImm(NULL, 0x0000ffff));
+      // Add the z coordinate for actual 3d-images
+      if (dim > 2)
+         src[2] = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), z, src[2]);
+      else
+         src[2] = z;
+
+      // Compute the surface parameters from tile shifts
+      Value *tile_shift[3];
+      Value *tile_extbf[3];
+      // Fetch the "real" tiling parameters of the underlying surface
+      for (int i = 0; i < 3; i++) {
+         tile_extbf[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 16));
+         tile_shift[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 24));
+      }
+
+      // However for load/atomics, we use byte-indexing. And for byte
+      // indexing, the X tile size is always the same. This leads to slightly
+      // better code.
+      if (su->op == OP_SULDP || su->op == OP_SUREDP) {
+         tile_extbf[0] = bld.loadImm(NULL, 0x600);
+         tile_shift[0] = bld.loadImm(NULL, 6);
+      }
+
+      // Compute the location of given coordinate, both inside the tile as
+      // well as which (linearly-laid out) tile it's in.
+      Value *coord_in_tile[3];
+      Value *tile[3];
+      for (int i = 0; i < 3; i++) {
+         coord_in_tile[i] = bld.mkOp2v(OP_EXTBF, TYPE_U32, bld.getSSA(), src[i], tile_extbf[i]);
+         tile[i] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), src[i], tile_shift[i]);
+      }
+
+      // Based on the "real" tiling parameters, compute x/y coordinates in the
+      // larger surface with 2d tiling that was supplied to the hardware. This
+      // was determined and verified with the help of the tiling pseudocode in
+      // the envytools docs.
+      //
+      // adj_x = x_coord_in_tile + x_tile * x_tile_size * z_tile_size +
+      //         z_coord_in_tile * x_tile_size
+      // adj_y = y_coord_in_tile + y_tile * y_tile_size +
+      //         z_tile * y_tile_size * y_tiles
+      //
+      // Note: STRIDE_Y = y_tile_size * y_tiles
+
+      su->setSrc(0, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[0],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[0],
+                                  bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                                             tile_shift[2], tile_shift[0]))),
+            bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[2], tile_shift[0])));
+
+      su->setSrc(1, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(),
+                       tile[2], y_size_aligned),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[1],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[1], tile_shift[1]))));
+
+      if (su->tex.target == TEX_TARGET_3D) {
+         su->moveSources(3, -1);
+         su->tex.target = TEX_TARGET_2D;
+      }
     }
  
     // prevent read fault when the image is not actually bound
@@ -2438,7 +2524,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
        assert(format->components != 0);
        // make sure that the format doesn't mismatch when it's not FMT_NONE
        bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
-                TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+                TYPE_U32, bld.loadImm(NULL, ffs(blockwidth / 8) - 1),
                  loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                  pred->getDef(0));
     }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c

index c574169..a9b475e 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -1102,19 +1102,27 @@ nvc0_set_surface_info(struct nouveau_pushbuf *push,
  
     /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
      * offset and to check if the format doesn't mismatch. */
-   info[12] = util_format_get_blocksize(view->format);
+   info[12] = ffs(util_format_get_blocksize(view->format)) - 1;
  
     if (res->base.target == PIPE_BUFFER) {
        info[0]  = address >> 8;
        info[2]  = width;
     } else {
        struct nv50_miptree *mt = nv50_miptree(&res->base);
+      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
+      unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;
+      unsigned nby = align(util_format_get_nblocksy(view->format, height),
+                           NVC0_TILE_SIZE_Y(lvl->tile_mode));
  
+      /* NOTE: this does not precisely match nve4; the values are made to be
+       * easier for the shader to consume.
+       */
        info[0]  = address >> 8;
-      info[2]  = width;
-      info[4]  = height;
+      info[2]  = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;
+      info[4]  = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;
        info[5]  = mt->layer_stride >> 8;
-      info[6]  = depth;
+      info[6]  = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;
+      info[7]  = z;
        info[14] = mt->ms_x;
        info[15] = mt->ms_y;
     }
@@ -1167,24 +1175,31 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
           } else {
              struct nv50_miptree *mt = nv50_miptree(view->resource);
              struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
-            const unsigned z = view->u.tex.first_layer;
+            unsigned adjusted_width = width, adjusted_height = height;
  
              if (mt->layout_3d) {
-               address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
-               if (depth >= 1) {
-                  pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
-                                     "3D images are not supported!");
-                  debug_printf("3D images are not supported!\n");
-               }
+               // We have to adjust the size of the 3d surface to be
+               // accessible within 2d limits. The size of each z tile goes
+               // into the x direction, while the number of z tiles goes into
+               // the y direction.
+               const unsigned nbx = util_format_get_nblocksx(view->format, width);
+               const unsigned nby = util_format_get_nblocksy(view->format, height);
+               const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);
+               const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
+               const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
+
+               adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;
+               adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);
              } else {
+               const unsigned z = view->u.tex.first_layer;
                 address += mt->layer_stride * z;
              }
              address += lvl->offset;
  
              PUSH_DATAh(push, address);
              PUSH_DATA (push, address);
-            PUSH_DATA (push, width << mt->ms_x);
-            PUSH_DATA (push, height << mt->ms_y);
+            PUSH_DATA (push, adjusted_width << mt->ms_x);
+            PUSH_DATA (push, adjusted_height << mt->ms_y);
              PUSH_DATA (push, rt);
              PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
           }
author	Ilia Mirkin <imirkin@alum.mit.edu>
	Sun, 16 May 2021 03:18:52 +0000 (23:18 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Sun, 6 Jun 2021 18:26:26 +0000 (18:26 +0000)
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/nvc0/nvc0_tex.c		patch \| blob \| history