nvk: Manually offset for array layers in copy/blit
authorFaith Ekstrand <faith.ekstrand@collabora.com>
Tue, 31 Jan 2023 02:11:49 +0000 (20:11 -0600)
committerMarge Bot <emma+marge@anholt.net>
Fri, 4 Aug 2023 21:31:54 +0000 (21:31 +0000)
The more I start at the hardware the more convinced I become that arrays
and 3D are not the same thing.  I'm sure they're similar enough in
certain circumstances but they don't appear to be in the way they're
treated in the 2D hardware, especially when miplevels are involved.
Instead of trying to use the 2D engine's depth/slice logic, offset
manually.  We have to emit a copy command for each slice anyway so
making the GPU do those calculations doesn't really gain us anything.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>

src/nouveau/vulkan/nvk_cmd_blit.c
src/nouveau/vulkan/nvk_cmd_copy.c

index 5da50b6..ff75488 100644 (file)
@@ -72,18 +72,6 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
       struct nvk_image_level *src_level = &src->level[region->srcSubresource.mipLevel];
       struct nvk_image_level *dst_level = &dst->level[region->dstSubresource.mipLevel];
 
-      VkDeviceSize src_addr = nvk_image_base_address(src, region->srcSubresource.mipLevel);
-      VkDeviceSize dst_addr = nvk_image_base_address(dst, region->dstSubresource.mipLevel);
-
-      /* we can't select the src layer, so we need to offset manually
-       * Also, this is completely safe as we don't tile over array layers contrary to the depth
-       * of a 3d image.
-       */
-      src_addr += region->srcSubresource.baseArrayLayer * src_level->layer_stride;
-
-      uint32_t src_depth = src_level->extent.depth * src->vk.array_layers;
-      uint32_t dst_depth = dst_level->extent.depth * dst->vk.array_layers;
-
       unsigned x_i = region->dstOffsets[0].x < region->dstOffsets[1].x ? 0 : 1;
       unsigned y_i = region->dstOffsets[0].y < region->dstOffsets[1].y ? 0 : 1;
 
@@ -124,7 +112,7 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
       }
 
       P_MTHD(push, NV902D, SET_SRC_DEPTH);
-      P_NV902D_SET_SRC_DEPTH(push, src_depth);
+      P_NV902D_SET_SRC_DEPTH(push, src_level->extent.depth);
 
       P_MTHD(push, NV902D, SET_SRC_PITCH);
       P_NV902D_SET_SRC_PITCH(push, src_level->row_stride);
@@ -143,14 +131,12 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
       }
 
       P_MTHD(push, NV902D, SET_DST_DEPTH);
-      P_NV902D_SET_DST_DEPTH(push, dst_depth);
+      P_NV902D_SET_DST_DEPTH(push, dst_level->extent.depth);
 
       P_MTHD(push, NV902D, SET_DST_PITCH);
       P_NV902D_SET_DST_PITCH(push, dst_level->row_stride);
       P_NV902D_SET_DST_WIDTH(push, dst_level->extent.width);
       P_NV902D_SET_DST_HEIGHT(push, dst_level->extent.height);
-      P_NV902D_SET_DST_OFFSET_UPPER(push, dst_addr >> 32);
-      P_NV902D_SET_DST_OFFSET_LOWER(push, dst_addr & 0xffffffff);
 
       P_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0);
       P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(push, dst_start_x);
@@ -165,21 +151,30 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
       P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(push, src_start_x_fp >> 32);
       P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(push, src_start_y_fp & 0xffffffff);
 
-      /* we can select the dst but not the src layer... */
-      for (unsigned z = 0; z < region->srcSubresource.layerCount; z++) {
+      assert(src->vk.image_type != VK_IMAGE_TYPE_3D);
+      assert(dst->vk.image_type != VK_IMAGE_TYPE_3D);
+      for (unsigned w = 0; w < region->srcSubresource.layerCount; w++) {
+         VkDeviceSize src_addr = nvk_image_base_address(src, region->srcSubresource.mipLevel);
+         VkDeviceSize dst_addr = nvk_image_base_address(dst, region->dstSubresource.mipLevel);
+
+         src_addr += (w + region->srcSubresource.baseArrayLayer) *
+                     src_level->layer_stride;
+         dst_addr += (w + region->dstSubresource.baseArrayLayer) *
+                     dst_level->layer_stride;
+
          P_MTHD(push, NV902D, SET_SRC_OFFSET_UPPER);
          P_NV902D_SET_SRC_OFFSET_UPPER(push, src_addr >> 32);
          P_NV902D_SET_SRC_OFFSET_LOWER(push, src_addr & 0xffffffff);
 
+         P_MTHD(push, NV902D, SET_DST_OFFSET_UPPER);
+         P_NV902D_SET_DST_OFFSET_UPPER(push, dst_addr >> 32);
+         P_NV902D_SET_DST_OFFSET_LOWER(push, dst_addr & 0xffffffff);
+
          P_MTHD(push, NV902D, SET_DST_LAYER);
-         P_NV902D_SET_DST_LAYER(push, z + region->dstSubresource.baseArrayLayer);
+         P_NV902D_SET_DST_LAYER(push, 0);
 
          P_MTHD(push, NV902D, PIXELS_FROM_MEMORY_SRC_Y0_INT);
          P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(push, src_start_y_fp >> 32);
-
-         /* this works only if there is no tiling on z */
-         assert(!src_level->tile.z);
-         src_addr += src_level->layer_stride;
       }
    }
 }
index c98035f..d89868b 100644 (file)
@@ -50,6 +50,7 @@ nouveau_copy_linear(struct nouveau_ws_push *push,
 struct nouveau_copy_buffer {
    uint64_t base_addr;
    VkOffset3D offset;
+   uint32_t base_array_layer;
    VkExtent3D extent;
    uint32_t row_stride;
    uint32_t layer_stride;
@@ -61,6 +62,7 @@ struct nouveau_copy {
    struct nouveau_copy_buffer dst;
    uint32_t bpp;
    VkExtent3D extent;
+   uint32_t layer_count;
 };
 
 static struct nouveau_copy_buffer
@@ -88,122 +90,121 @@ nouveau_copy_rect_image(
       .base_addr = nvk_image_base_address(img, sub_res->mipLevel),
       .offset = vk_image_sanitize_offset(&img->vk, offset),
       .extent = level->extent,
+      .base_array_layer = sub_res->baseArrayLayer,
       .row_stride = level->row_stride,
       .layer_stride = level->layer_stride,
       .tile = level->tile,
    };
 
-   buf.extent.depth *= img->vk.array_layers;
-   buf.offset.z += sub_res->baseArrayLayer;
-
    return buf;
 }
 
 static void
 nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
 {
-   VkDeviceSize src_addr = copy->src.base_addr;
-   VkDeviceSize dst_addr = copy->dst.base_addr;
-
-   if (!copy->src.tile.is_tiled) {
-      src_addr +=
-         copy->src.offset.x * copy->bpp +
-         copy->src.offset.y * copy->src.row_stride +
-         copy->src.offset.z * copy->src.layer_stride;
-   }
+   struct nouveau_ws_push *push = cmd->push;
 
-   if (!copy->dst.tile.is_tiled) {
-      dst_addr +=
-         copy->dst.offset.x * copy->bpp +
-         copy->dst.offset.y * copy->dst.row_stride +
-         copy->dst.offset.z * copy->dst.layer_stride;
-   }
+   for (unsigned w = 0; w < copy->layer_count; w++) {
+      VkDeviceSize src_addr = copy->src.base_addr;
+      VkDeviceSize dst_addr = copy->dst.base_addr;
 
-   struct nouveau_ws_push *push = cmd->push;
+      src_addr += (w + copy->src.base_array_layer) * copy->src.layer_stride;
+      dst_addr += (w + copy->dst.base_array_layer) * copy->dst.layer_stride;
 
-   for (unsigned z = 0; z < copy->extent.depth; z++) {
-      P_MTHD(push, NV90B5, OFFSET_IN_UPPER);
-      P_NV90B5_OFFSET_IN_UPPER(push, src_addr >> 32);
-      P_NV90B5_OFFSET_IN_LOWER(push, src_addr & 0xffffffff);
-      P_NV90B5_OFFSET_OUT_UPPER(push, dst_addr >> 32);
-      P_NV90B5_OFFSET_OUT_LOWER(push, dst_addr & 0xfffffff);
-      P_NV90B5_PITCH_IN(push, copy->src.row_stride);
-      P_NV90B5_PITCH_OUT(push, copy->dst.row_stride);
-      P_NV90B5_LINE_LENGTH_IN(push, copy->extent.width * copy->bpp);
-      P_NV90B5_LINE_COUNT(push, copy->extent.height);
-
-      uint32_t src_layout = 0, dst_layout = 0;
-      if (copy->src.tile.is_tiled) {
-         assert(copy->src.tile.is_fermi);
-         P_MTHD(push, NV90B5, SET_SRC_BLOCK_SIZE);
-         P_NV90B5_SET_SRC_BLOCK_SIZE(push, {
-            .width = copy->src.tile.x,
-            .height = copy->src.tile.y,
-            .depth = copy->src.tile.z,
-            .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
-         });
-         P_NV90B5_SET_SRC_WIDTH(push, copy->src.extent.width * copy->bpp);
-         P_NV90B5_SET_SRC_HEIGHT(push, copy->src.extent.height);
-         P_NV90B5_SET_SRC_DEPTH(push, copy->src.extent.depth);
-         P_NV90B5_SET_SRC_LAYER(push, z + copy->src.offset.z);
-
-         if (cmd->pool->dev->pdev->dev->cls >= 0xc1) {
-            P_MTHD(push, NVC1B5, SRC_ORIGIN_X);
-            P_NVC1B5_SRC_ORIGIN_X(push, copy->src.offset.x * copy->bpp);
-            P_NVC1B5_SRC_ORIGIN_Y(push, copy->src.offset.y);
-         } else {
-            P_MTHD(push, NV90B5, SET_SRC_ORIGIN);
-            P_NV90B5_SET_SRC_ORIGIN(push, {
-               .x = copy->src.offset.x * copy->bpp,
-               .y = copy->src.offset.y
-            });
-         }
+      if (!copy->src.tile.is_tiled) {
+         src_addr += copy->src.offset.x * copy->bpp +
+                     copy->src.offset.y * copy->src.row_stride;
+      }
 
-         src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
-      } else {
-         src_addr += copy->src.layer_stride;
-         src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
+      if (!copy->dst.tile.is_tiled) {
+         dst_addr += copy->dst.offset.x * copy->bpp +
+                     copy->dst.offset.y * copy->dst.row_stride;
       }
 
-      if (copy->dst.tile.is_tiled) {
-         assert(copy->dst.tile.is_fermi);
-         P_MTHD(push, NV90B5, SET_DST_BLOCK_SIZE);
-         P_NV90B5_SET_DST_BLOCK_SIZE(push, {
-            .width = copy->dst.tile.x,
-            .height = copy->dst.tile.y,
-            .depth = copy->dst.tile.z,
-            .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
-         });
-         P_NV90B5_SET_DST_WIDTH(push, copy->dst.extent.width * copy->bpp);
-         P_NV90B5_SET_DST_HEIGHT(push, copy->dst.extent.height);
-         P_NV90B5_SET_DST_DEPTH(push, copy->dst.extent.depth);
-         P_NV90B5_SET_DST_LAYER(push, z + copy->dst.offset.z);
-
-         if (cmd->pool->dev->pdev->dev->cls >= 0xc1) {
-            P_MTHD(push, NVC1B5, DST_ORIGIN_X);
-            P_NVC1B5_DST_ORIGIN_X(push, copy->dst.offset.x * copy->bpp);
-            P_NVC1B5_DST_ORIGIN_Y(push, copy->dst.offset.y);
+      for (unsigned z = 0; z < copy->extent.depth; z++) {
+         P_MTHD(push, NV90B5, OFFSET_IN_UPPER);
+         P_NV90B5_OFFSET_IN_UPPER(push, src_addr >> 32);
+         P_NV90B5_OFFSET_IN_LOWER(push, src_addr & 0xffffffff);
+         P_NV90B5_OFFSET_OUT_UPPER(push, dst_addr >> 32);
+         P_NV90B5_OFFSET_OUT_LOWER(push, dst_addr & 0xfffffff);
+         P_NV90B5_PITCH_IN(push, copy->src.row_stride);
+         P_NV90B5_PITCH_OUT(push, copy->dst.row_stride);
+         P_NV90B5_LINE_LENGTH_IN(push, copy->extent.width * copy->bpp);
+         P_NV90B5_LINE_COUNT(push, copy->extent.height);
+
+         uint32_t src_layout = 0, dst_layout = 0;
+         if (copy->src.tile.is_tiled) {
+            assert(copy->src.tile.is_fermi);
+            P_MTHD(push, NV90B5, SET_SRC_BLOCK_SIZE);
+            P_NV90B5_SET_SRC_BLOCK_SIZE(push, {
+               .width = copy->src.tile.x,
+               .height = copy->src.tile.y,
+               .depth = copy->src.tile.z,
+               .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
+            });
+            P_NV90B5_SET_SRC_WIDTH(push, copy->src.extent.width * copy->bpp);
+            P_NV90B5_SET_SRC_HEIGHT(push, copy->src.extent.height);
+            P_NV90B5_SET_SRC_DEPTH(push, copy->src.extent.depth);
+            P_NV90B5_SET_SRC_LAYER(push, z + copy->src.offset.z);
+
+            if (cmd->pool->dev->pdev->dev->cls >= 0xc1) {
+               P_MTHD(push, NVC1B5, SRC_ORIGIN_X);
+               P_NVC1B5_SRC_ORIGIN_X(push, copy->src.offset.x * copy->bpp);
+               P_NVC1B5_SRC_ORIGIN_Y(push, copy->src.offset.y);
+            } else {
+               P_MTHD(push, NV90B5, SET_SRC_ORIGIN);
+               P_NV90B5_SET_SRC_ORIGIN(push, {
+                  .x = copy->src.offset.x * copy->bpp,
+                  .y = copy->src.offset.y
+               });
+            }
+
+            src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
          } else {
-            P_MTHD(push, NV90B5, SET_DST_ORIGIN);
-            P_NV90B5_SET_DST_ORIGIN(push, {
-               .x = copy->dst.offset.x * copy->bpp,
-               .y = copy->dst.offset.y
+            src_addr += copy->src.layer_stride;
+            src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
+         }
+
+         if (copy->dst.tile.is_tiled) {
+            assert(copy->dst.tile.is_fermi);
+            P_MTHD(push, NV90B5, SET_DST_BLOCK_SIZE);
+            P_NV90B5_SET_DST_BLOCK_SIZE(push, {
+               .width = copy->dst.tile.x,
+               .height = copy->dst.tile.y,
+               .depth = copy->dst.tile.z,
+               .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
             });
+            P_NV90B5_SET_DST_WIDTH(push, copy->dst.extent.width * copy->bpp);
+            P_NV90B5_SET_DST_HEIGHT(push, copy->dst.extent.height);
+            P_NV90B5_SET_DST_DEPTH(push, copy->dst.extent.depth);
+            P_NV90B5_SET_DST_LAYER(push, z + copy->dst.offset.z);
+
+            if (cmd->pool->dev->pdev->dev->cls >= 0xc1) {
+               P_MTHD(push, NVC1B5, DST_ORIGIN_X);
+               P_NVC1B5_DST_ORIGIN_X(push, copy->dst.offset.x * copy->bpp);
+               P_NVC1B5_DST_ORIGIN_Y(push, copy->dst.offset.y);
+            } else {
+               P_MTHD(push, NV90B5, SET_DST_ORIGIN);
+               P_NV90B5_SET_DST_ORIGIN(push, {
+                  .x = copy->dst.offset.x * copy->bpp,
+                  .y = copy->dst.offset.y
+               });
+            }
+
+            dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
+         } else {
+            dst_addr += copy->dst.layer_stride;
+            dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
          }
 
-         dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
-      } else {
-         dst_addr += copy->dst.layer_stride;
-         dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
+         P_IMMD(push, NV90B5, LAUNCH_DMA, {
+            .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
+            .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
+            .flush_enable = FLUSH_ENABLE_TRUE,
+            .src_memory_layout = src_layout,
+            .dst_memory_layout = dst_layout
+         });
       }
-
-      P_IMMD(push, NV90B5, LAUNCH_DMA, {
-         .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
-         .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
-         .flush_enable = FLUSH_ENABLE_TRUE,
-         .src_memory_layout = src_layout,
-         .dst_memory_layout = dst_layout
-      });
    }
 }
 
@@ -248,8 +249,8 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
          .dst = nouveau_copy_rect_image(dst, region->imageOffset, &region->imageSubresource),
          .bpp = buffer_layout.element_size_B,
          .extent = vk_image_sanitize_extent(&dst->vk, region->imageExtent),
+         .layer_count = region->imageSubresource.layerCount,
       };
-      copy.extent.depth *= region->imageSubresource.layerCount;
 
       nouveau_copy_rect(cmd, &copy);
 
@@ -291,8 +292,8 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
          .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset, buffer_layout),
          .bpp = buffer_layout.element_size_B,
          .extent = vk_image_sanitize_extent(&src->vk, region->imageExtent),
+         .layer_count = region->imageSubresource.layerCount,
       };
-      copy.extent.depth *= region->imageSubresource.layerCount;
 
       nouveau_copy_rect(cmd, &copy);
 
@@ -337,8 +338,8 @@ nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,
          .dst = nouveau_copy_rect_image(dst, region->dstOffset, &region->dstSubresource),
          .bpp = bpp,
          .extent = vk_image_sanitize_extent(&src->vk, region->extent),
+         .layer_count = region->srcSubresource.layerCount,
       };
-      copy.extent.depth *= region->srcSubresource.layerCount;
 
       nouveau_copy_rect(cmd, &copy);
    }