util/tc: implement renderpass tracking

author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>

Fri, 14 Oct 2022 15:53:03 +0000 (11:53 -0400)

committer Marge Bot <emma+marge@anholt.net>

Sat, 29 Oct 2022 20:19:51 +0000 (20:19 +0000)
author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fri, 14 Oct 2022 15:53:03 +0000 (11:53 -0400)
committer Marge Bot <emma+marge@anholt.net>
Sat, 29 Oct 2022 20:19:51 +0000 (20:19 +0000)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c

index bac20b1..6b3929d 100644 (file)
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -116,6 +116,103 @@ tc_clear_driver_thread(struct threaded_context *tc)
  #endif
  }
  
+/* ensure the batch's array of renderpass data is large enough for the current index */
+static void
+tc_batch_renderpass_infos_resize(struct tc_batch *batch)
+{
+   unsigned size = batch->renderpass_infos.capacity;
+   unsigned cur_num = batch->renderpass_info_idx;
+
+   if (size / sizeof(struct tc_renderpass_info) > cur_num)
+      return;
+
+   if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10))
+      mesa_loge("tc: memory alloc fail!");
+
+   if (size != batch->renderpass_infos.capacity) {
+      /* zero new allocation region */
+      uint8_t *data = batch->renderpass_infos.data;
+      memset(data + size, 0, batch->renderpass_infos.capacity - size);
+      unsigned start = size / sizeof(struct tc_renderpass_info);
+      unsigned count = (batch->renderpass_infos.capacity - size) /
+                       sizeof(struct tc_renderpass_info);
+      struct tc_renderpass_info *infos = batch->renderpass_infos.data;
+      for (unsigned i = 0; i < count; i++)
+         util_queue_fence_init(&infos[start + i].ready);
+   }
+}
+
+/* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
+static void
+tc_signal_renderpass_info_ready(struct threaded_context *tc)
+{
+   if (tc->renderpass_info_recording &&
+       !util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready))
+      util_queue_fence_signal(&tc->renderpass_info_recording->ready);
+}
+
+/* increment the current renderpass info struct for recording
+ * 'full_copy' is used for preserving data across non-blocking tc batch flushes
+ */
+static void
+tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy)
+{
+   struct tc_batch *batch = &tc->batch_slots[tc->next];
+   struct tc_renderpass_info *tc_info = batch->renderpass_infos.data;
+
+   /* signal existing info since it will not be used anymore */
+   tc_signal_renderpass_info_ready(tc);
+   /* increment rp info and initialize it */
+   batch->renderpass_info_idx++;
+   tc_batch_renderpass_infos_resize(batch);
+   tc_info = batch->renderpass_infos.data;
+
+   if (full_copy) {
+      /* copy the previous data in its entirety: this is still the same renderpass */
+      if (tc->renderpass_info_recording)
+         tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data;
+      else
+         tc_info[batch->renderpass_info_idx].data = 0;
+   } else {
+      /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
+      tc_info[batch->renderpass_info_idx].data = 0;
+      if (tc->renderpass_info_recording)
+         tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2];
+   }
+
+   util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
+   assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]);
+   /* this is now the current recording renderpass info */
+   tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx];
+}
+
+static ALWAYS_INLINE struct tc_renderpass_info *
+tc_get_renderpass_info(struct threaded_context *tc)
+{
+   return tc->renderpass_info_recording;
+}
+
+/* update metadata at draw time */
+static void
+tc_parse_draw(struct threaded_context *tc)
+{
+   struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+
+   if (info) {
+      /* all buffers that aren't cleared are considered loaded */
+      info->cbuf_load |= ~info->cbuf_clear;
+      if (!info->zsbuf_clear)
+         info->zsbuf_load = true;
+      /* previous invalidates are no longer relevant */
+      info->cbuf_invalidate = 0;
+      info->zsbuf_invalidate = false;
+      info->has_draw = true;
+   }
+
+   tc->in_renderpass = true;
+   tc->seen_fb_state = true;
+}
+
  static void *
  to_call_check(void *ptr, unsigned num_slots)
  {
@@ -194,18 +291,13 @@ tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
  #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
     offsetof(struct pipe_draw_info, min_index)
  
-static void
-tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
+ALWAYS_INLINE static void
+batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
  {
-   struct tc_batch *batch = job;
-   struct pipe_context *pipe = batch->tc->pipe;
-   uint64_t *last = &batch->slots[batch->num_total_slots];
-
-   tc_batch_check(batch);
-   tc_set_driver_thread(batch->tc);
-
-   assert(!batch->token);
-
+   /* if the framebuffer state is persisting from a previous batch,
+    * begin incrementing renderpass info on the first set_framebuffer_state call
+    */
+   bool first = !batch->first_set_fb;
     for (uint64_t *iter = batch->slots; iter != last;) {
        struct tc_call_base *call = (struct tc_call_base *)iter;
  
@@ -218,7 +310,50 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
        TC_TRACE_SCOPE(call->call_id);
  
        iter += execute_func[call->call_id](pipe, call, last);
+
+      if (parsing) {
+         if (call->call_id == TC_CALL_flush) {
+            /* always increment renderpass info for non-deferred flushes */
+            batch->tc->renderpass_info++;
+            /* if a flush happens, renderpass info is always incremented after */
+            first = false;
+         } else if (call->call_id == TC_CALL_set_framebuffer_state) {
+            /* the renderpass info pointer is already set at the start of the batch,
+             * so don't increment on the first set_framebuffer_state call
+             */
+            if (!first)
+               batch->tc->renderpass_info++;
+            first = false;
+         } else if (call->call_id >= TC_CALL_draw_single &&
+                    call->call_id <= TC_CALL_draw_vstate_multi) {
+            /* if a draw happens before a set_framebuffer_state on this batch,
+             * begin incrementing renderpass data 
+             */
+            first = false;
+         }
+      }
     }
+}
+
+static void
+tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
+{
+   struct tc_batch *batch = job;
+   struct pipe_context *pipe = batch->tc->pipe;
+   uint64_t *last = &batch->slots[batch->num_total_slots];
+
+   tc_batch_check(batch);
+   tc_set_driver_thread(batch->tc);
+
+   assert(!batch->token);
+
+   /* setup renderpass info */
+   batch->tc->renderpass_info = batch->renderpass_infos.data;
+
+   if (batch->tc->options.parse_renderpass_info)
+      batch_execute(batch, pipe, last, true);
+   else
+      batch_execute(batch, pipe, last, false);
  
     /* Add the fence to the list of fences for the driver to signal at the next
      * flush, which we use for tracking which buffers are referenced by
@@ -247,6 +382,7 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
     tc_batch_check(batch);
     batch->num_total_slots = 0;
     batch->last_mergeable_call = NULL;
+   batch->first_set_fb = false;
  }
  
  static void
@@ -267,7 +403,7 @@ tc_begin_next_buffer_list(struct threaded_context *tc)
  }
  
  static void
-tc_batch_flush(struct threaded_context *tc)
+tc_batch_flush(struct threaded_context *tc, bool full_copy)
  {
     struct tc_batch *next = &tc->batch_slots[tc->next];
  
@@ -281,12 +417,22 @@ tc_batch_flush(struct threaded_context *tc)
        next->token->tc = NULL;
        tc_unflushed_batch_token_reference(&next->token, NULL);
     }
+   /* reset renderpass info index for subsequent use */
+   next->renderpass_info_idx = -1;
  
     util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
                        NULL, 0);
     tc->last = tc->next;
     tc->next = (tc->next + 1) % TC_MAX_BATCHES;
     tc_begin_next_buffer_list(tc);
+
+   /* always increment renderpass info on batch flush;
+    * renderpass info can only be accessed by its owner batch during execution
+    */
+   if (tc->renderpass_info_recording) {
+      tc->batch_slots[tc->next].first_set_fb = full_copy;
+      tc_batch_increment_renderpass_info(tc, full_copy);
+   }
  }
  
  /* This is the function that adds variable-sized calls into the current
@@ -303,7 +449,8 @@ tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
     tc_debug_check(tc);
  
     if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
-      tc_batch_flush(tc);
+      /* copy existing renderpass info during flush */
+      tc_batch_flush(tc, true);
        next = &tc->batch_slots[tc->next];
        tc_assert(next->num_total_slots == 0);
        tc_assert(next->last_mergeable_call == NULL);
@@ -406,6 +553,8 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
  
     tc_debug_check(tc);
  
+   tc_signal_renderpass_info_ready(tc);
+
     /* Only wait for queued calls... */
     if (!util_queue_fence_is_signalled(&last->fence)) {
        util_queue_fence_wait(&last->fence);
@@ -438,6 +587,17 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
  
     tc_debug_check(tc);
  
+   if (tc->options.parse_renderpass_info) {
+      int renderpass_info_idx = next->renderpass_info_idx;
+      if (renderpass_info_idx > 0) {
+         next->renderpass_info_idx = -1;
+         tc_batch_increment_renderpass_info(tc, false);
+      } else if (tc->renderpass_info_recording->has_draw) {
+         tc->renderpass_info_recording->data32[0] = 0;
+      }
+      tc->seen_fb_state = false;
+   }
+
     MESA_TRACE_END();
  }
  
@@ -466,7 +626,7 @@ threaded_context_flush(struct pipe_context *_pipe,
         * running. That should be better for cache locality.
         */
        if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
-         tc_batch_flush(tc);
+         tc_batch_flush(tc, false);
        else
           tc_sync(token->tc);
     }
@@ -1097,11 +1257,33 @@ TC_CSO_WHOLE(blend)
  TC_CSO_WHOLE(rasterizer)
  TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
  TC_CSO_BIND(depth_stencil_alpha,
+   if (param && tc->options.parse_renderpass_info) {
+      /* dsa info is only ever added during a renderpass;
+       * changes outside of a renderpass reset the data
+       */
+      if (!tc->in_renderpass) {
+         tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
+         tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
+      }
+      /* let the driver parse its own state */
+      tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
+   }
  )
  TC_CSO_DELETE(depth_stencil_alpha)
  TC_CSO_WHOLE(compute)
  TC_CSO_CREATE(fs, shader)
  TC_CSO_BIND(fs,
+   if (param && tc->options.parse_renderpass_info) {
+      /* fs info is only ever added during a renderpass;
+       * changes outside of a renderpass reset the data
+       */
+      if (!tc->in_renderpass) {
+         tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
+         tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
+      }
+      /* let the driver parse its own state */
+      tc->options.fs_parse(param, tc_get_renderpass_info(tc));
+   }
  )
  TC_CSO_DELETE(fs)
  TC_CSO_SHADER(vs)
@@ -1199,10 +1381,48 @@ tc_set_framebuffer_state(struct pipe_context *_pipe,
     p->state.layers = fb->layers;
     p->state.nr_cbufs = nr_cbufs;
  
-   for (unsigned i = 0; i < nr_cbufs; i++) {
-      p->state.cbufs[i] = NULL;
-      pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+
+   if (tc->options.parse_renderpass_info) {
+      /* store existing zsbuf data for possible persistence */
+      uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
+                      0 :
+                      tc->renderpass_info_recording->data8[3];
+      bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
+                           (fb->zsbuf ? fb->zsbuf->texture : NULL);
+
+      for (unsigned i = 0; i < nr_cbufs; i++) {
+         p->state.cbufs[i] = NULL;
+         pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+         /* full tracking requires storing the fb attachment resources */
+         tc->fb_resources[i] = fb->cbufs[i] ? fb->cbufs[i]->texture : NULL;
+      }
+      memset(&tc->fb_resources[nr_cbufs], 0,
+             sizeof(void*) * (PIPE_MAX_COLOR_BUFS - nr_cbufs));
+
+      tc->fb_resources[PIPE_MAX_COLOR_BUFS] = fb->zsbuf ? fb->zsbuf->texture : NULL;
+      if (tc->seen_fb_state) {
+         /* this is the end of a renderpass, so increment the renderpass info */
+         tc_batch_increment_renderpass_info(tc, false);
+         /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
+          * keep zsbuf usage data
+          */
+         if (!zsbuf_changed)
+            tc->renderpass_info_recording->data8[3] = zsbuf;
+      } else {
+         /* this is the first time a set_framebuffer_call is triggered;
+          * just increment the index and keep using the existing info for recording
+          */
+         tc->batch_slots[tc->next].renderpass_info_idx = 0;
+      }
+      /* future fb state changes will increment the index */
+      tc->seen_fb_state = true;
+   } else {
+      for (unsigned i = 0; i < nr_cbufs; i++) {
+         p->state.cbufs[i] = NULL;
+         pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+      }
     }
+   tc->in_renderpass = false;
     p->state.zsbuf = NULL;
     pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
  }
@@ -3185,6 +3405,8 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
     struct pipe_screen *screen = pipe->screen;
     bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
  
+   tc->in_renderpass = false;
+
     if (async && tc->options.create_fence) {
        if (fence) {
           struct tc_batch *next = &tc->batch_slots[tc->next];
@@ -3215,17 +3437,25 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
        p->fence = fence ? *fence : NULL;
        p->flags = flags | TC_FLUSH_ASYNC;
  
-      if (!(flags & PIPE_FLUSH_DEFERRED))
-         tc_batch_flush(tc);
+      if (!(flags & PIPE_FLUSH_DEFERRED)) {
+         /* non-deferred async flushes indicate completion of existing renderpass info */
+         tc_signal_renderpass_info_ready(tc);
+         tc_batch_flush(tc, false);
+         tc->seen_fb_state = false;
+      }
+
        return;
     }
  
  out_of_memory:
+   /* renderpass info is signaled during sync */
     tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
                     flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
  
-   if (!(flags & PIPE_FLUSH_DEFERRED))
+   if (!(flags & PIPE_FLUSH_DEFERRED)) {
        tc_flush_queries(tc);
+      tc->seen_fb_state = false;
+   }
     tc_set_driver_thread(tc);
     pipe->flush(pipe, fence, flags);
     tc_clear_driver_thread(tc);
@@ -3440,6 +3670,7 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
     struct threaded_context *tc = threaded_context(_pipe);
     unsigned index_size = info->index_size;
     bool has_user_indices = info->has_user_indices;
+   tc_parse_draw(tc);
  
     if (unlikely(indirect)) {
        assert(!has_user_indices);
@@ -3759,6 +3990,7 @@ tc_draw_vertex_state(struct pipe_context *_pipe,
                       unsigned num_draws)
  {
     struct threaded_context *tc = threaded_context(_pipe);
+   tc_parse_draw(tc);
  
     if (num_draws == 1) {
        /* Single draw. */
@@ -4053,6 +4285,18 @@ tc_invalidate_resource(struct pipe_context *_pipe,
     struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
                                                 tc_resource_call);
     tc_set_resource_reference(&call->resource, resource);
+
+   struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+   if (info) {
+      if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
+         info->zsbuf_invalidate = true;
+      } else {
+         for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+            if (tc->fb_resources[i] == resource)
+               info->cbuf_invalidate |= BITFIELD_BIT(i);
+         }
+      }
+   }
  }
  
  struct tc_clear {
@@ -4083,8 +4327,23 @@ tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor
     struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
  
     p->buffers = buffers;
-   if (scissor_state)
+   if (scissor_state) {
        p->scissor_state = *scissor_state;
+      struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+      /* partial clear info is useful for drivers to know whether any zs writes occur;
+       * drivers are responsible for optimizing partial clear -> full clear
+       */
+      if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
+         info->zsbuf_clear_partial |= !info->zsbuf_clear;
+   } else {
+      struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+      if (info) {
+         /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
+         info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
+         if (buffers & PIPE_CLEAR_DEPTHSTENCIL && !info->zsbuf_load && !info->zsbuf_clear_partial)
+            info->zsbuf_clear = true;
+      }
+   }
     p->scissor_state_set = !!scissor_state;
     p->color = *color;
     p->depth = depth;
@@ -4480,6 +4739,7 @@ tc_destroy(struct pipe_context *_pipe)
  
        for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
           util_queue_fence_destroy(&tc->batch_slots[i].fence);
+         util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
           assert(!tc->batch_slots[i].token);
        }
     }
@@ -4595,6 +4855,11 @@ threaded_context_create(struct pipe_context *pipe,
  #endif
        tc->batch_slots[i].tc = tc;
        util_queue_fence_init(&tc->batch_slots[i].fence);
+      tc->batch_slots[i].renderpass_info_idx = -1;
+      if (tc->options.parse_renderpass_info) {
+         util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
+         tc_batch_renderpass_infos_resize(&tc->batch_slots[i]);
+      }
     }
     for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
        util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
@@ -4756,6 +5021,8 @@ threaded_context_create(struct pipe_context *pipe,
        *out = tc;
  
     tc_begin_next_buffer_list(tc);
+   if (tc->options.parse_renderpass_info)
+      tc_batch_increment_renderpass_info(tc, false);
     return &tc->base;
  
  fail:
@@ -4773,3 +5040,11 @@ threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned d
           tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
     }
  }
+
+const struct tc_renderpass_info *
+threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait)
+{
+   if (tc->renderpass_info && wait)
+      util_queue_fence_wait(&tc->renderpass_info->ready);
+   return tc->renderpass_info;
+}
+\ No newline at end of file
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h

index 67296fc..e87b006 100644 (file)
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -204,6 +204,7 @@
  #include "util/u_range.h"
  #include "util/u_thread.h"
  #include "util/slab.h"
+#include "util/u_dynarray.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -424,6 +425,63 @@ struct tc_unflushed_batch_token {
     struct threaded_context *tc;
  };
  
+struct tc_renderpass_info {
+   union {
+      struct {
+         /* bitmask of full-cleared color buffers */
+         uint8_t cbuf_clear;
+         /* bitmask of not-full-cleared color buffers */
+         uint8_t cbuf_load;
+         /* bitmask of color buffers that have their stores invalidated */
+         uint8_t cbuf_invalidate;
+         /* whether the zsbuf is full-cleared */
+         bool zsbuf_clear : 1;
+         /* whether the zsbuf is partial-cleared */
+         bool zsbuf_clear_partial : 1;
+         /* whether the zsbuf is not-full-cleared */
+         bool zsbuf_load : 1;
+         /* whether the zsbuf is invalidated */
+         bool zsbuf_invalidate : 1;
+         /* whether a draw occurs */
+         bool has_draw : 1;
+         uint8_t pad : 3;
+         /* 32 bits offset */
+         /* bitmask of color buffers using fbfetch */
+         uint8_t cbuf_fbfetch;
+         /* whether the fragment shader writes to the zsbuf */
+         bool zsbuf_write_fs : 1;
+         /* whether the DSA state writes to the zsbuf */
+         bool zsbuf_write_dsa : 1;
+         /* whether the DSA state reads the zsbuf */
+         bool zsbuf_read_dsa : 1;
+         /* whether the zsbuf is used for fbfetch */
+         bool zsbuf_fbfetch : 1;
+         uint8_t pad2 : 4;
+         uint16_t pad3;
+      };
+      uint64_t data;
+      /* fb info is in data32[0] */
+      uint32_t data32[2];
+      /* cso info is in data16[2] */
+      uint16_t data16[4];
+      /* zsbuf fb info is in data8[3] */
+      uint8_t data8[8];
+   };
+   /* determines whether the info can be "safely" read by drivers or if it may still be in use */
+   struct util_queue_fence ready;
+};
+
+static inline bool
+tc_renderpass_info_is_zsbuf_used(const struct tc_renderpass_info *info)
+{
+   return info->zsbuf_clear ||
+          info->zsbuf_clear_partial ||
+          info->zsbuf_write_fs ||
+          info->zsbuf_write_dsa ||
+          info->zsbuf_read_dsa ||
+          info->zsbuf_fbfetch;
+}
+
  struct tc_batch {
     struct threaded_context *tc;
  #if !defined(NDEBUG) && TC_DEBUG >= 1
@@ -431,6 +489,8 @@ struct tc_batch {
  #endif
     uint16_t num_total_slots;
     uint16_t buffer_list_index;
+   /* the index of the current renderpass info for recording */
+   int renderpass_info_idx;
  
     /* The last mergeable call that was added to this batch (i.e.
      * buffer subdata). This might be out-of-date or NULL.
@@ -438,8 +498,11 @@ struct tc_batch {
     struct tc_call_base *last_mergeable_call;
  
     struct util_queue_fence fence;
+   /* whether the first set_framebuffer_state call has been seen by this batch */
+   bool first_set_fb;
     struct tc_unflushed_batch_token *token;
     uint64_t slots[TC_SLOTS_PER_BATCH];
+   struct util_dynarray renderpass_infos;
  };
  
  struct tc_buffer_list {
@@ -468,6 +531,13 @@ struct threaded_context_options {
  
     /* If true, create_fence_fd doesn't access the context in the driver. */
     bool unsynchronized_create_fence_fd;
+   /* if true, parse and track renderpass info during execution */
+   bool parse_renderpass_info;
+   /* callbacks for drivers to read their DSA/FS state and update renderpass info accordingly
+    * note: drivers must ONLY append to renderpass info using |=
+    */
+   void (*dsa_parse)(void *state, struct tc_renderpass_info *info);
+   void (*fs_parse)(void *state, struct tc_renderpass_info *info);
  };
  
  struct threaded_context {
@@ -511,6 +581,10 @@ struct threaded_context {
     bool seen_tcs;
     bool seen_tes;
     bool seen_gs;
+   /* whether the current renderpass has seen a set_framebuffer_state call */
+   bool seen_fb_state;
+   /* whether a renderpass is currently active */
+   bool in_renderpass;
  
     bool seen_streamout_buffers;
     bool seen_shader_buffers[PIPE_SHADER_TYPES];
@@ -545,13 +619,33 @@ struct threaded_context {
  
     struct tc_batch batch_slots[TC_MAX_BATCHES];
     struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
+   /* the curent framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */
+   struct pipe_resource *fb_resources[PIPE_MAX_COLOR_BUFS + 1];
+   /* accessed by main thread; preserves info across batches */
+   struct tc_renderpass_info *renderpass_info_recording;
+   /* accessed by driver thread */
+   struct tc_renderpass_info *renderpass_info;
  };
  
+
  void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage);
  void threaded_resource_deinit(struct pipe_resource *res);
  struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
  void tc_driver_internal_flush_notify(struct threaded_context *tc);
  
+/** function for getting the current renderpass info:
+ * - renderpass info is always valid
+ * - set 'wait=true' when calling during normal execution
+ * - set 'wait=true' when calling from flush
+ *
+ * Rules:
+ * 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback
+ * 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call
+ * 3) this must not be used during any internal driver operations (e.g., u_blitter)
+ */
+const struct tc_renderpass_info *
+threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait);
+
  struct pipe_context *
  threaded_context_create(struct pipe_context *pipe,
                          struct slab_parent_pool *parent_transfer_pool,
author	Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
	Fri, 14 Oct 2022 15:53:03 +0000 (11:53 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Sat, 29 Oct 2022 20:19:51 +0000 (20:19 +0000)
src/gallium/auxiliary/util/u_threaded_context.c		patch \| blob \| history
src/gallium/auxiliary/util/u_threaded_context.h		patch \| blob \| history