The main motivation is to improve the score of viewperf13/snx.
This new interface is designed to be optimal for display lists as implemented
by the vbo module. It has much lower CPU overhead in the frontend, threaded
context, and the driver.
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>
* ``PIPE_CAP_SUPPORTED_PRIM_MODES``: A bitmask of the ``pipe_prim_type`` enum values that the driver can natively support.
* ``PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART``: A bitmask of the ``pipe_prim_type`` enum values that the driver can natively support for primitive restart. Only useful if ``PIPE_CAP_PRIMITIVE_RESTART`` is also exported.
* ``PIPE_CAP_PREFER_BACK_BUFFER_REUSE``: Only applies to DRI_PRIME. If 1, the driver prefers that DRI3 tries to use the same back buffer each frame. If 0, this means DRI3 will at least use 2 back buffers and ping-pong between them to allow the tiled->linear copy to run in parallel.
+* ``PIPE_CAP_DRAW_VERTEX_STATE``: Driver supports `pipe_screen::create_vertex_state/vertex_state_destroy` and `pipe_context::draw_vertex_state`. Only used by display lists and designed to serve vbo_save.
.. _pipe_capf:
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
+#include "util/u_helpers.h"
#include "util/u_upload_mgr.h"
#include "util/u_threaded_context.h"
#include "noop_public.h"
external_only, count);
}
+static struct pipe_vertex_state *
+noop_create_vertex_state(struct pipe_screen *screen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask)
+{
+ struct pipe_vertex_state *state = CALLOC_STRUCT(pipe_vertex_state);
+
+ if (!state)
+ return NULL;
+
+ util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf,
+ full_velem_mask, state);
+ return state;
+}
+
+static void noop_vertex_state_destroy(struct pipe_screen *screen,
+ struct pipe_vertex_state *state)
+{
+ pipe_vertex_buffer_unreference(&state->input.vbuffer);
+ pipe_resource_reference(&state->input.indexbuf, NULL);
+ FREE(state);
+}
+
struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen)
{
struct noop_pipe_screen *noop_screen;
screen->get_device_uuid = noop_get_device_uuid;
screen->query_dmabuf_modifiers = noop_query_dmabuf_modifiers;
screen->resource_create_with_modifiers = noop_resource_create_with_modifiers;
+ screen->create_vertex_state = noop_create_vertex_state;
+ screen->vertex_state_destroy = noop_vertex_state_destroy;
slab_create_parent(&noop_screen->pool_transfers,
sizeof(struct pipe_transfer), 64);
{
}
+static void noop_draw_vertex_state(struct pipe_context *ctx,
+ struct pipe_vertex_state *state,
+ uint32_t partial_velem_mask,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws)
+{
+}
+
static void noop_launch_grid(struct pipe_context *ctx,
const struct pipe_grid_info *info)
{
ctx->sampler_view_destroy = noop_sampler_view_destroy;
ctx->surface_destroy = noop_surface_destroy;
ctx->draw_vbo = noop_draw_vbo;
+ ctx->draw_vertex_state = noop_draw_vertex_state;
ctx->launch_grid = noop_launch_grid;
ctx->create_stream_output_target = noop_create_stream_output_target;
ctx->stream_output_target_destroy = noop_stream_output_target_destroy;
}
return false;
}
+
+void
+util_init_pipe_vertex_state(struct pipe_screen *screen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask,
+ struct pipe_vertex_state *state)
+{
+ assert(num_elements == util_bitcount(full_velem_mask));
+
+ pipe_reference_init(&state->reference, 1);
+ state->screen = screen;
+
+ pipe_vertex_buffer_reference(&state->input.vbuffer, buffer);
+ pipe_resource_reference(&state->input.indexbuf, indexbuf);
+ state->input.num_elements = num_elements;
+ for (unsigned i = 0; i < num_elements; i++)
+ state->input.elements[i] = elements[i];
+ state->input.full_velem_mask = full_velem_mask;
+}
bool
util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped);
+void
+util_init_pipe_vertex_state(struct pipe_screen *screen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask,
+ struct pipe_vertex_state *state);
+
#ifdef __cplusplus
}
#endif
}
static inline void
+pipe_vertex_state_reference(struct pipe_vertex_state **dst,
+ struct pipe_vertex_state *src)
+{
+ struct pipe_vertex_state *old_dst = *dst;
+
+ if (pipe_reference(old_dst ? &old_dst->reference : NULL,
+ src ? &src->reference : NULL))
+ old_dst->screen->vertex_state_destroy(old_dst->screen, old_dst);
+ *dst = src;
+}
+
+static inline void
pipe_vertex_buffer_unreference(struct pipe_vertex_buffer *dst)
{
if (dst->is_user_buffer)
struct pipe_draw_info info;
STATIC_ASSERT(sizeof(info.mode) == 1);
+ struct pipe_draw_vertex_state_info dvs_info;
+ STATIC_ASSERT(sizeof(dvs_info.mode) == 1);
+
static const struct debug_named_value names[] = {
DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS),
DEBUG_NAMED_VALUE(PIPE_PRIM_LINES),
return 1;
case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART:
+ case PIPE_CAP_DRAW_VERTEX_STATE:
return 0;
default:
struct pipe_draw_info;
struct pipe_draw_indirect_info;
struct pipe_draw_start_count_bias;
+struct pipe_draw_vertex_state_info;
struct pipe_grid_info;
struct pipe_fence_handle;
struct pipe_framebuffer_state;
struct pipe_transfer;
struct pipe_vertex_buffer;
struct pipe_vertex_element;
+struct pipe_vertex_state;
struct pipe_video_buffer;
struct pipe_video_codec;
struct pipe_viewport_state;
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws);
+
+ /**
+ * Multi draw for display lists.
+ *
+ * For more information, see pipe_vertex_state and
+ * pipe_draw_vertex_state_info.
+ *
+ * Explanation of partial_vertex_mask:
+ *
+ * 1. pipe_vertex_state::input::elements have a monotonic logical index
+ * determined by pipe_vertex_state::input::full_velem_mask, specifically,
+ * the position of the i-th bit set is the logical index of the i-th
+ * vertex element, up to 31.
+ *
+ * 2. pipe_vertex_state::input::partial_velem_mask is a subset of
+ * full_velem_mask where the bits set determine which vertex elements
+ * should be bound contiguously. The vertex elements corresponding to
+ * the bits not set in partial_velem_mask should be ignored.
+ *
+ * Those two allow creating pipe_vertex_state that has more vertex
+ * attributes than the vertex shader has inputs. The idea is that
+ * pipe_vertex_state can be used with any vertex shader that has the same
+ * number of inputs and same logical indices or less. This may sound like
+ * an overly complicated way to bind a subset of vertex elements, but it
+ * actually simplifies everything else:
+ *
+ * - In st/mesa, full_velem_mask is exactly the mask of enabled vertex
+ * attributes (VERT_ATTRIB_x) in the display list VAO, while
+ * partial_velem_mask is exactly the inputs_read mask of the vertex
+ * shader (also VERT_ATTRIB_x).
+ *
+ * - In the driver, some bit ops and popcnt is needed to assemble vertex
+ * elements very quickly.
+ */
+ void (*draw_vertex_state)(struct pipe_context *ctx,
+ struct pipe_vertex_state *state,
+ uint32_t partial_velem_mask,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ unsigned num_draws);
/*@}*/
/**
PIPE_CAP_SUPPORTED_PRIM_MODES,
PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART,
PIPE_CAP_PREFER_BACK_BUFFER_REUSE,
+ PIPE_CAP_DRAW_VERTEX_STATE,
PIPE_CAP_LAST,
/* XXX do not add caps after PIPE_CAP_LAST! */
struct pipe_transfer;
struct pipe_box;
struct pipe_memory_info;
+struct pipe_vertex_buffer;
+struct pipe_vertex_element;
+struct pipe_vertex_state;
struct disk_cache;
struct driOptionCache;
struct u_transfer_helper;
+struct pipe_screen;
+
+typedef struct pipe_vertex_state *
+ (*pipe_create_vertex_state_func)(struct pipe_screen *screen,
+ struct pipe_vertex_buffer *buffer,
+ const struct pipe_vertex_element *elements,
+ unsigned num_elements,
+ struct pipe_resource *indexbuf,
+ uint32_t full_velem_mask);
+typedef void (*pipe_vertex_state_destroy_func)(struct pipe_screen *screen,
+ struct pipe_vertex_state *);
/**
* Gallium screen/adapter context. Basically everything
unsigned int (*get_dmabuf_modifier_planes)(struct pipe_screen *screen,
uint64_t modifier,
enum pipe_format format);
+
+ /**
+ * Vertex state CSO functions for precomputing vertex and index buffer
+ * states for display lists.
+ */
+ pipe_create_vertex_state_func create_vertex_state;
+ pipe_vertex_state_destroy_func vertex_state_destroy;
};
unsigned instance_divisor;
};
+/**
+ * Opaque refcounted constant state object encapsulating a vertex buffer,
+ * index buffer, and vertex elements. Used by display lists to bind those
+ * states and pass buffer references quickly.
+ *
+ * The state contains 1 index buffer, 0 or 1 vertex buffer, and 0 or more
+ * vertex elements.
+ *
+ * Constraints on the buffers to get the fastest codepath:
+ * - All buffer contents are considered immutable and read-only after
+ * initialization. This implies the following things.
+ * - No place is required to track whether these buffers are busy.
+ * - All CPU mappings of these buffers can be forced to UNSYNCHRONIZED by
+ * both drivers and common code unconditionally.
+ * - Buffer invalidation can be skipped by both drivers and common code
+ * unconditionally.
+ */
+struct pipe_vertex_state {
+ struct pipe_reference reference;
+ struct pipe_screen *screen;
+
+ /* The following structure is used as a key for util_vertex_state_cache
+ * to deduplicate identical state objects and thus enable more
+ * opportunities for draw merging.
+ */
+ struct {
+ struct pipe_resource *indexbuf;
+ struct pipe_vertex_buffer vbuffer;
+ unsigned num_elements;
+ struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
+ uint32_t full_velem_mask;
+ } input;
+};
struct pipe_draw_indirect_info
{
};
/**
+ * Draw vertex state description. It's translated to pipe_draw_info as follows:
+ * - mode comes from this structure
+ * - index_size is 4
+ * - instance_count is 1
+ * - index.resource comes from pipe_vertex_state
+ * - everything else is 0
+ */
+struct pipe_draw_vertex_state_info {
+#if defined(__GNUC__)
+ /* sizeof(mode) == 1 because it's a packed enum. */
+ enum pipe_prim_type mode; /**< the mode of the primitive */
+#else
+ /* sizeof(mode) == 1 is required by draw merging in u_threaded_context. */
+ uint8_t mode; /**< the mode of the primitive */
+#endif
+ bool take_vertex_state_ownership; /**< for skipping reference counting */
+};
+
+/**
* Information to describe a draw_vbo call.
*/
struct pipe_draw_info