#endif
}
+/* ensure the batch's array of renderpass data is large enough for the current index */
+static void
+tc_batch_renderpass_infos_resize(struct tc_batch *batch)
+{
+ unsigned size = batch->renderpass_infos.capacity;
+ unsigned cur_num = batch->renderpass_info_idx;
+
+ if (size / sizeof(struct tc_renderpass_info) > cur_num)
+ return;
+
+ if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10))
+ mesa_loge("tc: memory alloc fail!");
+
+ if (size != batch->renderpass_infos.capacity) {
+ /* zero new allocation region */
+ uint8_t *data = batch->renderpass_infos.data;
+ memset(data + size, 0, batch->renderpass_infos.capacity - size);
+ unsigned start = size / sizeof(struct tc_renderpass_info);
+ unsigned count = (batch->renderpass_infos.capacity - size) /
+ sizeof(struct tc_renderpass_info);
+ struct tc_renderpass_info *infos = batch->renderpass_infos.data;
+ for (unsigned i = 0; i < count; i++)
+ util_queue_fence_init(&infos[start + i].ready);
+ }
+}
+
+/* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
+static void
+tc_signal_renderpass_info_ready(struct threaded_context *tc)
+{
+ if (tc->renderpass_info_recording &&
+ !util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready))
+ util_queue_fence_signal(&tc->renderpass_info_recording->ready);
+}
+
+/* increment the current renderpass info struct for recording
+ * 'full_copy' is used for preserving data across non-blocking tc batch flushes
+ */
+static void
+tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy)
+{
+ struct tc_batch *batch = &tc->batch_slots[tc->next];
+ struct tc_renderpass_info *tc_info = batch->renderpass_infos.data;
+
+ /* signal existing info since it will not be used anymore */
+ tc_signal_renderpass_info_ready(tc);
+ /* increment rp info and initialize it */
+ batch->renderpass_info_idx++;
+ tc_batch_renderpass_infos_resize(batch);
+ tc_info = batch->renderpass_infos.data;
+
+ if (full_copy) {
+ /* copy the previous data in its entirety: this is still the same renderpass */
+ if (tc->renderpass_info_recording)
+ tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data;
+ else
+ tc_info[batch->renderpass_info_idx].data = 0;
+ } else {
+ /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
+ tc_info[batch->renderpass_info_idx].data = 0;
+ if (tc->renderpass_info_recording)
+ tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2];
+ }
+
+ util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
+ assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]);
+ /* this is now the current recording renderpass info */
+ tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx];
+}
+
+static ALWAYS_INLINE struct tc_renderpass_info *
+tc_get_renderpass_info(struct threaded_context *tc)
+{
+ return tc->renderpass_info_recording;
+}
+
+/* update metadata at draw time */
+static void
+tc_parse_draw(struct threaded_context *tc)
+{
+ struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+
+ if (info) {
+ /* all buffers that aren't cleared are considered loaded */
+ info->cbuf_load |= ~info->cbuf_clear;
+ if (!info->zsbuf_clear)
+ info->zsbuf_load = true;
+ /* previous invalidates are no longer relevant */
+ info->cbuf_invalidate = 0;
+ info->zsbuf_invalidate = false;
+ info->has_draw = true;
+ }
+
+ tc->in_renderpass = true;
+ tc->seen_fb_state = true;
+}
+
static void *
to_call_check(void *ptr, unsigned num_slots)
{
#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
offsetof(struct pipe_draw_info, min_index)
-static void
-tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
+ALWAYS_INLINE static void
+batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
{
- struct tc_batch *batch = job;
- struct pipe_context *pipe = batch->tc->pipe;
- uint64_t *last = &batch->slots[batch->num_total_slots];
-
- tc_batch_check(batch);
- tc_set_driver_thread(batch->tc);
-
- assert(!batch->token);
-
+ /* if the framebuffer state is persisting from a previous batch,
+ * begin incrementing renderpass info on the first set_framebuffer_state call
+ */
+ bool first = !batch->first_set_fb;
for (uint64_t *iter = batch->slots; iter != last;) {
struct tc_call_base *call = (struct tc_call_base *)iter;
TC_TRACE_SCOPE(call->call_id);
iter += execute_func[call->call_id](pipe, call, last);
+
+ if (parsing) {
+ if (call->call_id == TC_CALL_flush) {
+ /* always increment renderpass info for non-deferred flushes */
+ batch->tc->renderpass_info++;
+ /* if a flush happens, renderpass info is always incremented after */
+ first = false;
+ } else if (call->call_id == TC_CALL_set_framebuffer_state) {
+ /* the renderpass info pointer is already set at the start of the batch,
+ * so don't increment on the first set_framebuffer_state call
+ */
+ if (!first)
+ batch->tc->renderpass_info++;
+ first = false;
+ } else if (call->call_id >= TC_CALL_draw_single &&
+ call->call_id <= TC_CALL_draw_vstate_multi) {
+ /* if a draw happens before a set_framebuffer_state on this batch,
+ * begin incrementing renderpass data
+ */
+ first = false;
+ }
+ }
}
+}
+
+static void
+tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
+{
+ struct tc_batch *batch = job;
+ struct pipe_context *pipe = batch->tc->pipe;
+ uint64_t *last = &batch->slots[batch->num_total_slots];
+
+ tc_batch_check(batch);
+ tc_set_driver_thread(batch->tc);
+
+ assert(!batch->token);
+
+ /* setup renderpass info */
+ batch->tc->renderpass_info = batch->renderpass_infos.data;
+
+ if (batch->tc->options.parse_renderpass_info)
+ batch_execute(batch, pipe, last, true);
+ else
+ batch_execute(batch, pipe, last, false);
/* Add the fence to the list of fences for the driver to signal at the next
* flush, which we use for tracking which buffers are referenced by
tc_batch_check(batch);
batch->num_total_slots = 0;
batch->last_mergeable_call = NULL;
+ batch->first_set_fb = false;
}
static void
}
static void
-tc_batch_flush(struct threaded_context *tc)
+tc_batch_flush(struct threaded_context *tc, bool full_copy)
{
struct tc_batch *next = &tc->batch_slots[tc->next];
next->token->tc = NULL;
tc_unflushed_batch_token_reference(&next->token, NULL);
}
+ /* reset renderpass info index for subsequent use */
+ next->renderpass_info_idx = -1;
util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
NULL, 0);
tc->last = tc->next;
tc->next = (tc->next + 1) % TC_MAX_BATCHES;
tc_begin_next_buffer_list(tc);
+
+ /* always increment renderpass info on batch flush;
+ * renderpass info can only be accessed by its owner batch during execution
+ */
+ if (tc->renderpass_info_recording) {
+ tc->batch_slots[tc->next].first_set_fb = full_copy;
+ tc_batch_increment_renderpass_info(tc, full_copy);
+ }
}
/* This is the function that adds variable-sized calls into the current
tc_debug_check(tc);
if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
- tc_batch_flush(tc);
+ /* copy existing renderpass info during flush */
+ tc_batch_flush(tc, true);
next = &tc->batch_slots[tc->next];
tc_assert(next->num_total_slots == 0);
tc_assert(next->last_mergeable_call == NULL);
tc_debug_check(tc);
+ tc_signal_renderpass_info_ready(tc);
+
/* Only wait for queued calls... */
if (!util_queue_fence_is_signalled(&last->fence)) {
util_queue_fence_wait(&last->fence);
tc_debug_check(tc);
+ if (tc->options.parse_renderpass_info) {
+ int renderpass_info_idx = next->renderpass_info_idx;
+ if (renderpass_info_idx > 0) {
+ next->renderpass_info_idx = -1;
+ tc_batch_increment_renderpass_info(tc, false);
+ } else if (tc->renderpass_info_recording->has_draw) {
+ tc->renderpass_info_recording->data32[0] = 0;
+ }
+ tc->seen_fb_state = false;
+ }
+
MESA_TRACE_END();
}
* running. That should be better for cache locality.
*/
if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
- tc_batch_flush(tc);
+ tc_batch_flush(tc, false);
else
tc_sync(token->tc);
}
TC_CSO_WHOLE(rasterizer)
TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
TC_CSO_BIND(depth_stencil_alpha,
+ if (param && tc->options.parse_renderpass_info) {
+ /* dsa info is only ever added during a renderpass;
+ * changes outside of a renderpass reset the data
+ */
+ if (!tc->in_renderpass) {
+ tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
+ tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
+ }
+ /* let the driver parse its own state */
+ tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
+ }
)
TC_CSO_DELETE(depth_stencil_alpha)
TC_CSO_WHOLE(compute)
TC_CSO_CREATE(fs, shader)
TC_CSO_BIND(fs,
+ if (param && tc->options.parse_renderpass_info) {
+ /* fs info is only ever added during a renderpass;
+ * changes outside of a renderpass reset the data
+ */
+ if (!tc->in_renderpass) {
+ tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
+ tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
+ }
+ /* let the driver parse its own state */
+ tc->options.fs_parse(param, tc_get_renderpass_info(tc));
+ }
)
TC_CSO_DELETE(fs)
TC_CSO_SHADER(vs)
p->state.layers = fb->layers;
p->state.nr_cbufs = nr_cbufs;
- for (unsigned i = 0; i < nr_cbufs; i++) {
- p->state.cbufs[i] = NULL;
- pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+
+ if (tc->options.parse_renderpass_info) {
+ /* store existing zsbuf data for possible persistence */
+ uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
+ 0 :
+ tc->renderpass_info_recording->data8[3];
+ bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
+ (fb->zsbuf ? fb->zsbuf->texture : NULL);
+
+ for (unsigned i = 0; i < nr_cbufs; i++) {
+ p->state.cbufs[i] = NULL;
+ pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+ /* full tracking requires storing the fb attachment resources */
+ tc->fb_resources[i] = fb->cbufs[i] ? fb->cbufs[i]->texture : NULL;
+ }
+ memset(&tc->fb_resources[nr_cbufs], 0,
+ sizeof(void*) * (PIPE_MAX_COLOR_BUFS - nr_cbufs));
+
+ tc->fb_resources[PIPE_MAX_COLOR_BUFS] = fb->zsbuf ? fb->zsbuf->texture : NULL;
+ if (tc->seen_fb_state) {
+ /* this is the end of a renderpass, so increment the renderpass info */
+ tc_batch_increment_renderpass_info(tc, false);
+ /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
+ * keep zsbuf usage data
+ */
+ if (!zsbuf_changed)
+ tc->renderpass_info_recording->data8[3] = zsbuf;
+ } else {
+ /* this is the first time a set_framebuffer_call is triggered;
+ * just increment the index and keep using the existing info for recording
+ */
+ tc->batch_slots[tc->next].renderpass_info_idx = 0;
+ }
+ /* future fb state changes will increment the index */
+ tc->seen_fb_state = true;
+ } else {
+ for (unsigned i = 0; i < nr_cbufs; i++) {
+ p->state.cbufs[i] = NULL;
+ pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
+ }
}
+ tc->in_renderpass = false;
p->state.zsbuf = NULL;
pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
}
struct pipe_screen *screen = pipe->screen;
bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
+ tc->in_renderpass = false;
+
if (async && tc->options.create_fence) {
if (fence) {
struct tc_batch *next = &tc->batch_slots[tc->next];
p->fence = fence ? *fence : NULL;
p->flags = flags | TC_FLUSH_ASYNC;
- if (!(flags & PIPE_FLUSH_DEFERRED))
- tc_batch_flush(tc);
+ if (!(flags & PIPE_FLUSH_DEFERRED)) {
+ /* non-deferred async flushes indicate completion of existing renderpass info */
+ tc_signal_renderpass_info_ready(tc);
+ tc_batch_flush(tc, false);
+ tc->seen_fb_state = false;
+ }
+
return;
}
out_of_memory:
+ /* renderpass info is signaled during sync */
tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
- if (!(flags & PIPE_FLUSH_DEFERRED))
+ if (!(flags & PIPE_FLUSH_DEFERRED)) {
tc_flush_queries(tc);
+ tc->seen_fb_state = false;
+ }
tc_set_driver_thread(tc);
pipe->flush(pipe, fence, flags);
tc_clear_driver_thread(tc);
struct threaded_context *tc = threaded_context(_pipe);
unsigned index_size = info->index_size;
bool has_user_indices = info->has_user_indices;
+ tc_parse_draw(tc);
if (unlikely(indirect)) {
assert(!has_user_indices);
unsigned num_draws)
{
struct threaded_context *tc = threaded_context(_pipe);
+ tc_parse_draw(tc);
if (num_draws == 1) {
/* Single draw. */
struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
tc_resource_call);
tc_set_resource_reference(&call->resource, resource);
+
+ struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+ if (info) {
+ if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
+ info->zsbuf_invalidate = true;
+ } else {
+ for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (tc->fb_resources[i] == resource)
+ info->cbuf_invalidate |= BITFIELD_BIT(i);
+ }
+ }
+ }
}
struct tc_clear {
struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
p->buffers = buffers;
- if (scissor_state)
+ if (scissor_state) {
p->scissor_state = *scissor_state;
+ struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+ /* partial clear info is useful for drivers to know whether any zs writes occur;
+ * drivers are responsible for optimizing partial clear -> full clear
+ */
+ if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
+ info->zsbuf_clear_partial |= !info->zsbuf_clear;
+ } else {
+ struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
+ if (info) {
+ /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
+ info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL && !info->zsbuf_load && !info->zsbuf_clear_partial)
+ info->zsbuf_clear = true;
+ }
+ }
p->scissor_state_set = !!scissor_state;
p->color = *color;
p->depth = depth;
for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
util_queue_fence_destroy(&tc->batch_slots[i].fence);
+ util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
assert(!tc->batch_slots[i].token);
}
}
#endif
tc->batch_slots[i].tc = tc;
util_queue_fence_init(&tc->batch_slots[i].fence);
+ tc->batch_slots[i].renderpass_info_idx = -1;
+ if (tc->options.parse_renderpass_info) {
+ util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
+ tc_batch_renderpass_infos_resize(&tc->batch_slots[i]);
+ }
}
for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
*out = tc;
tc_begin_next_buffer_list(tc);
+ if (tc->options.parse_renderpass_info)
+ tc_batch_increment_renderpass_info(tc, false);
return &tc->base;
fail:
tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
}
}
+
+const struct tc_renderpass_info *
+threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait)
+{
+ if (tc->renderpass_info && wait)
+ util_queue_fence_wait(&tc->renderpass_info->ready);
+ return tc->renderpass_info;
+}
\ No newline at end of file
#include "util/u_range.h"
#include "util/u_thread.h"
#include "util/slab.h"
+#include "util/u_dynarray.h"
#ifdef __cplusplus
extern "C" {
struct threaded_context *tc;
};
+struct tc_renderpass_info {
+ union {
+ struct {
+ /* bitmask of full-cleared color buffers */
+ uint8_t cbuf_clear;
+ /* bitmask of not-full-cleared color buffers */
+ uint8_t cbuf_load;
+ /* bitmask of color buffers that have their stores invalidated */
+ uint8_t cbuf_invalidate;
+ /* whether the zsbuf is full-cleared */
+ bool zsbuf_clear : 1;
+ /* whether the zsbuf is partial-cleared */
+ bool zsbuf_clear_partial : 1;
+ /* whether the zsbuf is not-full-cleared */
+ bool zsbuf_load : 1;
+ /* whether the zsbuf is invalidated */
+ bool zsbuf_invalidate : 1;
+ /* whether a draw occurs */
+ bool has_draw : 1;
+ uint8_t pad : 3;
+ /* 32 bits offset */
+ /* bitmask of color buffers using fbfetch */
+ uint8_t cbuf_fbfetch;
+ /* whether the fragment shader writes to the zsbuf */
+ bool zsbuf_write_fs : 1;
+ /* whether the DSA state writes to the zsbuf */
+ bool zsbuf_write_dsa : 1;
+ /* whether the DSA state reads the zsbuf */
+ bool zsbuf_read_dsa : 1;
+ /* whether the zsbuf is used for fbfetch */
+ bool zsbuf_fbfetch : 1;
+ uint8_t pad2 : 4;
+ uint16_t pad3;
+ };
+ uint64_t data;
+ /* fb info is in data32[0] */
+ uint32_t data32[2];
+ /* cso info is in data16[2] */
+ uint16_t data16[4];
+ /* zsbuf fb info is in data8[3] */
+ uint8_t data8[8];
+ };
+ /* determines whether the info can be "safely" read by drivers or if it may still be in use */
+ struct util_queue_fence ready;
+};
+
+static inline bool
+tc_renderpass_info_is_zsbuf_used(const struct tc_renderpass_info *info)
+{
+ return info->zsbuf_clear ||
+ info->zsbuf_clear_partial ||
+ info->zsbuf_write_fs ||
+ info->zsbuf_write_dsa ||
+ info->zsbuf_read_dsa ||
+ info->zsbuf_fbfetch;
+}
+
struct tc_batch {
struct threaded_context *tc;
#if !defined(NDEBUG) && TC_DEBUG >= 1
#endif
uint16_t num_total_slots;
uint16_t buffer_list_index;
+ /* the index of the current renderpass info for recording */
+ int renderpass_info_idx;
/* The last mergeable call that was added to this batch (i.e.
* buffer subdata). This might be out-of-date or NULL.
struct tc_call_base *last_mergeable_call;
struct util_queue_fence fence;
+ /* whether the first set_framebuffer_state call has been seen by this batch */
+ bool first_set_fb;
struct tc_unflushed_batch_token *token;
uint64_t slots[TC_SLOTS_PER_BATCH];
+ struct util_dynarray renderpass_infos;
};
struct tc_buffer_list {
/* If true, create_fence_fd doesn't access the context in the driver. */
bool unsynchronized_create_fence_fd;
+ /* if true, parse and track renderpass info during execution */
+ bool parse_renderpass_info;
+ /* callbacks for drivers to read their DSA/FS state and update renderpass info accordingly
+ * note: drivers must ONLY append to renderpass info using |=
+ */
+ void (*dsa_parse)(void *state, struct tc_renderpass_info *info);
+ void (*fs_parse)(void *state, struct tc_renderpass_info *info);
};
struct threaded_context {
bool seen_tcs;
bool seen_tes;
bool seen_gs;
+ /* whether the current renderpass has seen a set_framebuffer_state call */
+ bool seen_fb_state;
+ /* whether a renderpass is currently active */
+ bool in_renderpass;
bool seen_streamout_buffers;
bool seen_shader_buffers[PIPE_SHADER_TYPES];
struct tc_batch batch_slots[TC_MAX_BATCHES];
struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
+ /* the curent framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */
+ struct pipe_resource *fb_resources[PIPE_MAX_COLOR_BUFS + 1];
+ /* accessed by main thread; preserves info across batches */
+ struct tc_renderpass_info *renderpass_info_recording;
+ /* accessed by driver thread */
+ struct tc_renderpass_info *renderpass_info;
};
+
void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage);
void threaded_resource_deinit(struct pipe_resource *res);
struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
void tc_driver_internal_flush_notify(struct threaded_context *tc);
+/** function for getting the current renderpass info:
+ * - renderpass info is always valid
+ * - set 'wait=true' when calling during normal execution
+ * - set 'wait=true' when calling from flush
+ *
+ * Rules:
+ * 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback
+ * 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call
+ * 3) this must not be used during any internal driver operations (e.g., u_blitter)
+ */
+const struct tc_renderpass_info *
+threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait);
+
struct pipe_context *
threaded_context_create(struct pipe_context *pipe,
struct slab_parent_pool *parent_transfer_pool,