zink: rework handling of unordered->ordered write buffer barriers
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Mon, 20 Mar 2023 14:49:55 +0000 (10:49 -0400)
committerMarge Bot <emma+marge@anholt.net>
Tue, 21 Mar 2023 01:39:39 +0000 (01:39 +0000)
this improves handling for barriers that originate from a write in the
unordered cmdbuf, adding tracking to resources to better determine access
in the unordered cmdbuf and then utilizing that to generate a single split
memory barrier added at the end of the unordered cmdbuf for all the buffers
written to on that cmdbuf

the next step will be to also merge the read access down onto the end-of-cmdbuf
barrier so that all stream upload-type functionality becomes a single barrier

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22027>

src/gallium/drivers/zink/zink_batch.c
src/gallium/drivers/zink/zink_context.c
src/gallium/drivers/zink/zink_types.h

index 193d337..db4a032 100644 (file)
@@ -31,8 +31,10 @@ reset_obj(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_r
       obj->unordered_read = true;
       obj->unordered_write = true;
       obj->access = 0;
+      obj->unordered_access = 0;
       obj->last_write = 0;
       obj->access_stage = 0;
+      obj->unordered_access_stage = 0;
       obj->copies_need_reset = true;
       /* also prune dead view objects */
       simple_mtx_lock(&obj->view_lock);
@@ -151,6 +153,9 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
    }
    bs->swapchain = NULL;
 
+   bs->unordered_write_access = 0;
+   bs->unordered_write_stages = 0;
+
    /* only reset submitted here so that tc fence desync can pick up the 'completed' flag
     * before the state is reused
     */
@@ -573,6 +578,16 @@ submit_queue(void *data, void *gdata, int thread_index)
       goto end;
    }
    if (bs->has_barriers) {
+      if (bs->unordered_write_access) {
+         VkMemoryBarrier mb;
+         mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+         mb.pNext = NULL;
+         mb.srcAccessMask = bs->unordered_write_access;
+         mb.dstAccessMask = 0;
+         VKSCR(CmdPipelineBarrier)(bs->barrier_cmdbuf,
+                                   bs->unordered_write_stages, 0,
+                                   0, 1, &mb, 0, NULL, 0, NULL);
+      }
       result = VKSCR(EndCommandBuffer)(bs->barrier_cmdbuf);
       if (result != VK_SUCCESS) {
          mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result));
index 12e86fd..ad189c6 100644 (file)
@@ -3959,16 +3959,25 @@ zink_resource_buffer_transfer_dst_barrier(struct zink_context *ctx, struct zink_
       zink_resource_copies_reset(res);
    bool unordered = true;
    struct pipe_box box = {offset, 0, 0, size, 0, 0};
+   bool can_unordered_write = unordered_res_exec(ctx, res, true);
    /* must barrier if something read the valid buffer range */
-   bool valid_read = res->obj->access && util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !unordered_res_exec(ctx, res, true);
-   if (zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics ||
-       zink_check_unordered_transfer_access(res, 0, &box) || valid_read) {
+   bool valid_read = (res->obj->access || res->obj->unordered_access) &&
+                     util_ranges_intersect(&res->valid_buffer_range, offset, offset + size) && !can_unordered_write;
+   if (valid_read || zink_screen(ctx->base.screen)->driver_workarounds.broken_cache_semantics || zink_check_unordered_transfer_access(res, 0, &box)) {
       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
       unordered = res->obj->unordered_write;
    } else {
-      res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
+      res->obj->unordered_access = VK_ACCESS_TRANSFER_WRITE_BIT;
       res->obj->last_write = VK_ACCESS_TRANSFER_WRITE_BIT;
-      res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+      res->obj->unordered_access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+      ctx->batch.state->unordered_write_access |= VK_ACCESS_TRANSFER_WRITE_BIT;
+      ctx->batch.state->unordered_write_stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+      if (!zink_resource_usage_matches(res, ctx->batch.state)) {
+         res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
+         res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+         res->obj->ordered_access_is_copied = true;
+      }
    }
    zink_resource_copy_box_add(res, 0, &box);
    /* this return value implies that the caller could do an unordered op on this resource */
@@ -4015,16 +4024,12 @@ pipeline_access_stage(VkAccessFlags flags)
 }
 
 ALWAYS_INLINE static bool
-zink_resource_buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline)
+buffer_needs_barrier(struct zink_resource *res, VkAccessFlags flags, VkPipelineStageFlags pipeline, bool unordered)
 {
-   if (!res->obj->access || !res->obj->access_stage)
-      return true;
-   if (!pipeline)
-      pipeline = pipeline_access_stage(flags);
-   return zink_resource_access_is_write(res->obj->access) ||
+   return zink_resource_access_is_write(unordered ? res->obj->unordered_access : res->obj->access) ||
           zink_resource_access_is_write(flags) ||
-          (res->obj->access_stage & pipeline) != pipeline ||
-          (res->obj->access & flags) != flags;
+          ((unordered ? res->obj->unordered_access_stage : res->obj->access_stage) & pipeline) != pipeline ||
+          ((unordered ? res->obj->unordered_access : res->obj->access) & flags) != flags;
 }
 
 void
@@ -4032,20 +4037,59 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
 {
    if (!pipeline)
       pipeline = pipeline_access_stage(flags);
-   if (!zink_resource_buffer_needs_barrier(res, flags, pipeline))
-      return;
 
    bool is_write = zink_resource_access_is_write(flags);
+   bool unordered = unordered_res_exec(ctx, res, is_write);
+   if (!buffer_needs_barrier(res, flags, pipeline, unordered))
+      return;
    enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
-   if (res->obj->access && !zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw)) {
+   bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
+   bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
+   bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
+   if (completed) {
+      /* reset access on complete */
+      res->obj->access = 0;
+      res->obj->access_stage = 0;
+      res->obj->last_write = 0;
+   } else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) {
+      /* always reset propagated access to avoid weirdness */
+      res->obj->access = 0;
+      res->obj->access_stage = 0;
+   } else if (!unordered && !unordered_usage_matches) {
+      /* reset unordered access on first ordered barrier */
+      res->obj->unordered_access = 0;
+      res->obj->unordered_access_stage = 0;
+   }
+   if (!usage_matches) {
+      /* reset unordered on first new cmdbuf barrier */
+      res->obj->unordered_access = 0;
+      res->obj->unordered_access_stage = 0;
+      res->obj->ordered_access_is_copied = false;
+   }
+   /* unordered barriers can be skipped if either:
+    * - there is no current-batch unordered access
+    * - the unordered access is not write access
+    */
+   bool can_skip_unordered = !unordered ? false : (!unordered_usage_matches || !zink_resource_access_is_write(res->obj->unordered_access));
+   /* ordered barriers can be skipped if both:
+    * - there is no current access
+    * - there is no current-batch unordered access
+    */
+   bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches);
+
+   if (!can_skip_unordered && !can_skip_ordered) {
       VkMemoryBarrier bmb;
       bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
       bmb.pNext = NULL;
-      bmb.srcAccessMask = res->obj->access;
-      bmb.dstAccessMask = flags;
-      assert(res->obj->access_stage);
-      VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
+      VkPipelineStageFlagBits stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);;
+      if (unordered) {
+         stages = usage_matches ? res->obj->unordered_access_stage : stages;
+         bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
+      } else {
+         bmb.srcAccessMask = res->obj->access;
+      }
 
+      VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
       bool marker = false;
       if (unlikely(zink_tracing)) {
          char buf[4096];
@@ -4061,7 +4105,7 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
       }
       VKCTX(CmdPipelineBarrier)(
          cmdbuf,
-         res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access),
+         stages,
          pipeline,
          0,
          1, &bmb,
@@ -4075,9 +4119,20 @@ zink_resource_buffer_barrier(struct zink_context *ctx, struct zink_resource *res
 
    if (is_write)
       res->obj->last_write = flags;
-
-   res->obj->access = flags;
-   res->obj->access_stage = pipeline;
+   if (unordered) {
+      /* these should get automatically emitted during submission */
+      res->obj->unordered_access = flags;
+      res->obj->unordered_access_stage = pipeline;
+      if (is_write) {
+         ctx->batch.state->unordered_write_access |= flags;
+         ctx->batch.state->unordered_write_stages |= pipeline;
+      }
+   }
+   if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) {
+      res->obj->access = flags;
+      res->obj->access_stage = pipeline;
+      res->obj->ordered_access_is_copied = unordered;
+   }
    if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write)
       zink_resource_copies_reset(res);
 }
@@ -4087,21 +4142,60 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
 {
    if (!pipeline)
       pipeline = pipeline_access_stage(flags);
-   if (!zink_resource_buffer_needs_barrier(res, flags, pipeline))
-      return;
 
    bool is_write = zink_resource_access_is_write(flags);
+   bool unordered = unordered_res_exec(ctx, res, is_write);
+   if (!buffer_needs_barrier(res, flags, pipeline, unordered))
+      return;
    enum zink_resource_access rw = is_write ? ZINK_RESOURCE_ACCESS_RW : ZINK_RESOURCE_ACCESS_WRITE;
-   if (res->obj->access && !zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw)) {
+   bool completed = zink_resource_usage_check_completion_fast(zink_screen(ctx->base.screen), res, rw);
+   bool usage_matches = !completed && zink_resource_usage_matches(res, ctx->batch.state);
+   bool unordered_usage_matches = res->obj->unordered_access && usage_matches;
+   if (completed) {
+      /* reset access on complete */
+      res->obj->access = 0;
+      res->obj->access_stage = 0;
+      res->obj->last_write = 0;
+   } else if (unordered && unordered_usage_matches && res->obj->ordered_access_is_copied) {
+      /* always reset propagated access to avoid weirdness */
+      res->obj->access = 0;
+      res->obj->access_stage = 0;
+   } else if (!unordered && !unordered_usage_matches) {
+      /* reset unordered access on first ordered barrier */
+      res->obj->unordered_access = 0;
+      res->obj->unordered_access_stage = 0;
+   }
+   if (!usage_matches) {
+      /* reset unordered on first new cmdbuf barrier */
+      res->obj->unordered_access = 0;
+      res->obj->unordered_access_stage = 0;
+      res->obj->ordered_access_is_copied = false;
+   }
+   /* unordered barriers can be skipped if either:
+    * - there is no current-batch unordered access
+    * - the unordered access is not write access
+    */
+   bool can_skip_unordered = !unordered ? false : (!unordered_usage_matches || !zink_resource_access_is_write(res->obj->unordered_access));
+   /* ordered barriers can be skipped if both:
+    * - there is no current access
+    * - there is no current-batch unordered access
+    */
+   bool can_skip_ordered = unordered ? false : (!res->obj->access && !unordered_usage_matches);
+
+   if (!can_skip_unordered && !can_skip_ordered) {
       VkMemoryBarrier2 bmb;
       bmb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
       bmb.pNext = NULL;
-      bmb.srcStageMask = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);
-      bmb.srcAccessMask = res->obj->access;
+      VkPipelineStageFlagBits stages = res->obj->access_stage ? res->obj->access_stage : pipeline_access_stage(res->obj->access);;
+      if (unordered) {
+         bmb.srcStageMask = usage_matches ? res->obj->unordered_access_stage : stages;
+         bmb.srcAccessMask = usage_matches ? res->obj->unordered_access : res->obj->access;
+      } else {
+         bmb.srcStageMask = stages;
+         bmb.srcAccessMask = res->obj->access;
+      }
       bmb.dstStageMask = pipeline;
       bmb.dstAccessMask = flags;
-      assert(res->obj->access_stage);
-      VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
       VkDependencyInfo dep = {
          VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
          NULL,
@@ -4113,6 +4207,7 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
          0,
          NULL
       };
+      VkCommandBuffer cmdbuf = is_write ? zink_get_cmdbuf(ctx, NULL, res) : zink_get_cmdbuf(ctx, res, NULL);
       bool marker = false;
       if (unlikely(zink_tracing)) {
          char buf[4096];
@@ -4134,9 +4229,20 @@ zink_resource_buffer_barrier2(struct zink_context *ctx, struct zink_resource *re
 
    if (is_write)
       res->obj->last_write = flags;
-
-   res->obj->access = flags;
-   res->obj->access_stage = pipeline;
+   if (unordered) {
+      /* these should get automatically emitted during submission */
+      res->obj->unordered_access = flags;
+      res->obj->unordered_access_stage = pipeline;
+      if (is_write) {
+         ctx->batch.state->unordered_write_access |= flags;
+         ctx->batch.state->unordered_write_stages |= pipeline;
+      }
+   }
+   if (!unordered || !usage_matches || res->obj->ordered_access_is_copied) {
+      res->obj->access = flags;
+      res->obj->access_stage = pipeline;
+      res->obj->ordered_access_is_copied = unordered;
+   }
    if (pipeline != VK_PIPELINE_STAGE_TRANSFER_BIT && is_write)
       zink_resource_copies_reset(res);
 }
index a477fd8..56849ef 100644 (file)
@@ -559,6 +559,9 @@ struct zink_batch_state {
    struct util_dynarray acquires;
    struct util_dynarray acquire_flags;
 
+   VkAccessFlagBits unordered_write_access;
+   VkPipelineStageFlagBits unordered_write_stages;
+
    struct util_queue_fence flush_completed;
 
    struct set programs;
@@ -1117,8 +1120,14 @@ struct zink_resource_object {
 
    VkPipelineStageFlagBits access_stage;
    VkAccessFlagBits access;
+   VkPipelineStageFlagBits unordered_access_stage;
+   VkAccessFlagBits unordered_access;
    VkAccessFlagBits last_write;
 
+   /* 'access' is propagated from unordered_access to handle ops occurring
+    * in the ordered cmdbuf which can promote barriers to unordered
+    */
+   bool ordered_access_is_copied;
    bool unordered_read;
    bool unordered_write;
    bool copies_valid;