zink: create separate linear tiling image for scanout
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fri, 9 Apr 2021 14:23:48 +0000 (10:23 -0400)
committerMarge Bot <eric+marge@anholt.net>
Mon, 12 Apr 2021 15:37:46 +0000 (15:37 +0000)
rendering onto a linear-tiled image is unbelievably slow if any sort of
blending is enabled, so instead always render to optimal tiling and then
copy to linear for scanout

this doubles performance for now and can be deleted in its entirety along
with the rest of the related hacks once real wsi support is implemented

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10180>

src/gallium/drivers/zink/zink_batch.c
src/gallium/drivers/zink/zink_context.c
src/gallium/drivers/zink/zink_context.h
src/gallium/drivers/zink/zink_resource.c
src/gallium/drivers/zink/zink_resource.h

index e0a8dec..8ae1159 100644 (file)
@@ -338,7 +338,7 @@ submit_queue(void *data, int thread_index)
    };
 
    if (bs->flush_res) {
-      mem_signal.memory = bs->flush_res->obj->mem;
+      mem_signal.memory = bs->flush_res->scanout_obj ? bs->flush_res->scanout_obj->mem : bs->flush_res->obj->mem;
       si.pNext = &mem_signal;
    }
 
index 4260d02..36e93f4 100644 (file)
@@ -1687,6 +1687,125 @@ equals_gfx_program(const void *a, const void *b)
    return memcmp(a, b, sizeof(struct zink_shader *) * (ZINK_SHADER_COUNT)) == 0;
 }
 
+/* TODO: remove for wsi */
+static void
+copy_scanout(struct zink_context *ctx, struct zink_resource *res)
+{
+   VkImageCopy region = {};
+   struct pipe_box box = {0, 0, 0,
+                          u_minify(res->base.b.width0, 0),
+                          u_minify(res->base.b.height0, 0), res->base.b.array_size};
+   box.depth = util_num_layers(&res->base.b, 0);
+   struct pipe_box *src_box = &box;
+   unsigned dstz = 0;
+
+   region.srcSubresource.aspectMask = res->aspect;
+   region.srcSubresource.mipLevel = 0;
+   switch (res->base.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      /* these use layer */
+      region.srcSubresource.baseArrayLayer = src_box->z;
+      region.srcSubresource.layerCount = src_box->depth;
+      region.srcOffset.z = 0;
+      region.extent.depth = 1;
+      break;
+   case PIPE_TEXTURE_3D:
+      /* this uses depth */
+      region.srcSubresource.baseArrayLayer = 0;
+      region.srcSubresource.layerCount = 1;
+      region.srcOffset.z = src_box->z;
+      region.extent.depth = src_box->depth;
+      break;
+   default:
+      /* these must only copy one layer */
+      region.srcSubresource.baseArrayLayer = 0;
+      region.srcSubresource.layerCount = 1;
+      region.srcOffset.z = 0;
+      region.extent.depth = 1;
+   }
+
+   region.srcOffset.x = src_box->x;
+   region.srcOffset.y = src_box->y;
+
+   region.dstSubresource.aspectMask = res->aspect;
+   region.dstSubresource.mipLevel = 0;
+   switch (res->base.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      /* these use layer */
+      region.dstSubresource.baseArrayLayer = dstz;
+      region.dstSubresource.layerCount = src_box->depth;
+      region.dstOffset.z = 0;
+      break;
+   case PIPE_TEXTURE_3D:
+      /* this uses depth */
+      region.dstSubresource.baseArrayLayer = 0;
+      region.dstSubresource.layerCount = 1;
+      region.dstOffset.z = dstz;
+      break;
+   default:
+      /* these must only copy one layer */
+      region.dstSubresource.baseArrayLayer = 0;
+      region.dstSubresource.layerCount = 1;
+      region.dstOffset.z = 0;
+   }
+
+   region.dstOffset.x = 0;
+   region.dstOffset.y = 0;
+   region.extent.width = src_box->width;
+   region.extent.height = src_box->height;
+   zink_resource_image_barrier(ctx, NULL, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
+
+   VkImageSubresourceRange isr = {
+      res->aspect,
+      0, VK_REMAINING_MIP_LEVELS,
+      0, VK_REMAINING_ARRAY_LAYERS
+   };
+   VkImageMemoryBarrier imb = {
+      VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+      NULL,
+      0,
+      VK_ACCESS_TRANSFER_WRITE_BIT,
+      VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+      VK_QUEUE_FAMILY_IGNORED,
+      VK_QUEUE_FAMILY_IGNORED,
+      res->scanout_obj->image,
+      isr
+   };
+   vkCmdPipelineBarrier(
+      ctx->batch.state->cmdbuf,
+      VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+      VK_PIPELINE_STAGE_TRANSFER_BIT,
+      0,
+      0, NULL,
+      0, NULL,
+      1, &imb
+   );
+
+   vkCmdCopyImage(ctx->batch.state->cmdbuf, res->obj->image, res->layout,
+                  res->scanout_obj->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                  1, &region);
+   imb.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+   imb.dstAccessMask = 0;
+   imb.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+   imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+   vkCmdPipelineBarrier(
+      ctx->batch.state->cmdbuf,
+      VK_PIPELINE_STAGE_TRANSFER_BIT,
+      VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+      0,
+      0, NULL,
+      0, NULL,
+      1, &imb
+   );
+}
+
 static void
 zink_flush(struct pipe_context *pctx,
            struct pipe_fence_handle **pfence,
@@ -1704,14 +1823,14 @@ zink_flush(struct pipe_context *pctx,
       zink_begin_render_pass(ctx, batch);
    }
 
-   if (flags & PIPE_FLUSH_END_OF_FRAME && ctx->fb_state.nr_cbufs) {
+   if (flags & PIPE_FLUSH_END_OF_FRAME) {
       zink_end_render_pass(ctx, batch);
-      for (int i = 0; i < ctx->fb_state.nr_cbufs; i++)
-         zink_resource_image_barrier(ctx, batch,
-                                     ctx->fb_state.cbufs[i] ? zink_resource(ctx->fb_state.cbufs[i]->texture) : NULL,
-                                     VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, 0);
-      if (zink_screen(pctx->screen)->needs_mesa_flush_wsi && ctx->fb_state.cbufs[0])
-         batch->state->flush_res = zink_resource(ctx->fb_state.cbufs[0]->texture);
+      if (ctx->flush_res) {
+         copy_scanout(ctx, ctx->flush_res);
+         if (zink_screen(pctx->screen)->needs_mesa_flush_wsi)
+            batch->state->flush_res = ctx->flush_res;
+         ctx->flush_res = NULL;
+      }
    }
 
    if (!batch->has_work) {
@@ -2047,9 +2166,15 @@ zink_memory_barrier(struct pipe_context *pctx, unsigned flags)
 }
 
 static void
-zink_flush_resource(struct pipe_context *pipe,
-                    struct pipe_resource *resource)
+zink_flush_resource(struct pipe_context *pctx,
+                    struct pipe_resource *pres)
 {
+   struct zink_context *ctx = zink_context(pctx);
+   /* TODO: this is not futureproof and should be updated once proper
+    * WSI support is added
+    */
+   if (pres->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+      ctx->flush_res = zink_resource(pres);
 }
 
 void
index e4de644..b79467c 100644 (file)
@@ -181,6 +181,7 @@ struct zink_context {
 
    struct primconvert_context *primconvert;
 
+   struct zink_resource *flush_res;
    struct zink_framebuffer *framebuffer;
    struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1];
    uint16_t clears_enabled;
index f1c5f6f..fc3a29a 100644 (file)
@@ -162,6 +162,7 @@ zink_resource_destroy(struct pipe_screen *pscreen,
       util_range_destroy(&res->valid_buffer_range);
 
    zink_resource_object_reference(screen, &res->obj, NULL);
+   zink_resource_object_reference(screen, &res->scanout_obj, NULL);
    threaded_resource_deinit(pres);
    FREE(res);
 }
@@ -248,7 +249,7 @@ get_image_usage(struct zink_screen *screen, VkImageTiling tiling, const struct p
       usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
    if (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)
       usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
-   if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)
+   if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT && !((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)) == (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)))
       usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
 
    if ((templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample) &&
@@ -359,6 +360,8 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
 
    VkMemoryRequirements reqs = {};
    VkMemoryPropertyFlags flags;
+   bool scanout = templ->bind & PIPE_BIND_SCANOUT;
+   bool shared = templ->bind & PIPE_BIND_SHARED;
 
    pipe_reference_init(&obj->reference, 1);
    util_dynarray_init(&obj->desc_set_refs.refs, NULL);
@@ -382,8 +385,13 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
          emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
          ici.pNext = &emici;
 
-         /* TODO: deal with DRM modifiers here */
-         ici.tiling = VK_IMAGE_TILING_LINEAR;
+         if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) {
+            // TODO: remove for wsi
+            ici.pNext = NULL;
+            scanout = false;
+            shared = false;
+         }
+
       }
 
       if (optimal_tiling)
@@ -417,7 +425,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
          .scanout = true,
       };
 
-      if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) {
+      if (screen->needs_mesa_wsi && scanout) {
          image_wsi_info.pNext = ici.pNext;
          ici.pNext = &image_wsi_info;
       }
@@ -460,7 +468,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
    }
 
    VkExportMemoryAllocateInfo emai = {};
-   if (templ->bind & PIPE_BIND_SHARED) {
+   if (templ->bind & PIPE_BIND_SHARED && shared) {
       emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
       emai.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
 
@@ -487,7 +495,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
       NULL,
    };
 
-   if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) {
+   if (screen->needs_mesa_wsi && scanout) {
       memory_wsi_info.implicit_sync = true;
 
       memory_wsi_info.pNext = mai.pNext;
@@ -574,6 +582,13 @@ resource_create(struct pipe_screen *pscreen,
       res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
       res->optimal_tiling = optimal_tiling;
       res->aspect = aspect_from_format(templ->format);
+      if (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED) && optimal_tiling) {
+         // TODO: remove for wsi
+         struct pipe_resource templ2 = res->base.b;
+         templ2.bind = (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) | PIPE_BIND_LINEAR;
+         res->scanout_obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling);
+         assert(!optimal_tiling);
+      }
    }
 
    if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) {
@@ -606,6 +621,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
 {
    struct zink_resource *res = zink_resource(tex);
    struct zink_screen *screen = zink_screen(pscreen);
+   //TODO: remove for wsi
+   struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj;
 
    if (res->base.b.target != PIPE_BUFFER) {
       VkImageSubresource sub_res = {};
@@ -613,7 +630,7 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
 
       sub_res.aspectMask = res->aspect;
 
-      vkGetImageSubresourceLayout(screen->dev, res->obj->image, &sub_res, &sub_res_layout);
+      vkGetImageSubresourceLayout(screen->dev, obj->image, &sub_res, &sub_res_layout);
 
       whandle->stride = sub_res_layout.rowPitch;
    }
@@ -623,7 +640,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
       VkMemoryGetFdInfoKHR fd_info = {};
       int fd;
       fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
-      fd_info.memory = res->obj->mem;
+      //TODO: remove for wsi
+      fd_info.memory = obj->mem;
       fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
       VkResult result = (*screen->vk_GetMemoryFdKHR)(screen->dev, &fd_info, &fd);
       if (result != VK_SUCCESS)
index 64b9f84..1bf9599 100644 (file)
@@ -88,6 +88,7 @@ struct zink_resource {
    VkAccessFlags access;
 
    struct zink_resource_object *obj;
+   struct zink_resource_object *scanout_obj; //TODO: remove for wsi
    union {
       struct util_range valid_buffer_range;
       struct {