From e78d7fe7d55370a5b6656027e22acd15b0bab817 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 11 Aug 2021 23:32:38 -0400 Subject: [PATCH] mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead Acked-By: Mike Blumenkrantz Part-of: --- src/mesa/main/dd.h | 18 ++++++ src/mesa/main/dlist.c | 13 +++- src/mesa/state_tracker/st_atom.c | 24 +++---- src/mesa/state_tracker/st_atom.h | 7 ++ src/mesa/state_tracker/st_atom_array.c | 37 +++++++++++ src/mesa/state_tracker/st_cb_feedback.c | 2 +- src/mesa/state_tracker/st_context.c | 2 +- src/mesa/state_tracker/st_draw.c | 61 +++++++++++++++++- src/mesa/state_tracker/st_draw.h | 3 +- src/mesa/vbo/vbo_save.h | 8 +++ src/mesa/vbo/vbo_save_api.c | 19 ++++++ src/mesa/vbo/vbo_save_draw.c | 111 ++++++++++++++++++++++++++++++++ 12 files changed, 287 insertions(+), 18 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 78fa4c4..dd24658 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -60,12 +60,17 @@ struct gl_texture_image; struct gl_texture_object; struct gl_memory_info; struct gl_transform_feedback_object; +struct gl_vertex_array_object; struct ati_fragment_shader; struct util_queue_monitoring; struct _mesa_prim; struct _mesa_index_buffer; struct pipe_draw_info; struct pipe_draw_start_count_bias; +struct pipe_vertex_state; +struct pipe_draw_vertex_state_info; +struct pipe_vertex_buffer; +struct pipe_vertex_element; /* GL_ARB_vertex_buffer_object */ /* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return @@ -641,8 +646,21 @@ struct dd_function_table { void (*DrawTransformFeedback)(struct gl_context *ctx, GLenum mode, unsigned num_instances, unsigned stream, struct gl_transform_feedback_object *tfb_vertcount); + + void (*DrawGalliumVertexState)(struct gl_context *ctx, + struct pipe_vertex_state *state, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + const uint8_t *mode, + unsigned num_draws, + bool per_vertex_edgeflags); /*@}*/ + struct pipe_vertex_state * + (*CreateGalliumVertexState)(struct gl_context *ctx, + const struct gl_vertex_array_object *vao, + struct gl_buffer_object *indexbuf, + uint32_t enabled_attribs); /** * \name State-changing functions. diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index f53045a..2ace229 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -73,7 +73,7 @@ #include "vbo/vbo_util.h" #include "vbo/vbo_save.h" #include "util/format_r11g11b10f.h" - +#include "util/u_inlines.h" #include "util/u_memory.h" #define USE_BITMAP_ATLAS 1 @@ -797,8 +797,15 @@ void mesa_print_display_list(GLuint list); static void vbo_destroy_vertex_list(struct gl_context *ctx, struct vbo_save_vertex_list *node) { - for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm) - _mesa_reference_vao(ctx, &node->VAO[vpm], NULL); + for (gl_vertex_processing_mode mode = VP_MODE_FF; mode < VP_MODE_MAX; ++mode) { + _mesa_reference_vao(ctx, &node->VAO[mode], NULL); + if (node->merged.gallium.private_refcount[mode]) { + assert(node->merged.gallium.private_refcount[mode] > 0); + p_atomic_add(&node->merged.gallium.state[mode]->reference.count, + -node->merged.gallium.private_refcount[mode]); + } + pipe_vertex_state_reference(&node->merged.gallium.state[mode], NULL); + } if (node->merged.mode) { free(node->merged.mode); diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 8ba5713..de9369e 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -140,31 +140,33 @@ static void check_program_state( struct st_context *st ) st->dirty |= dirty; } -static void check_attrib_edgeflag(struct st_context *st) +void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags) { - GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled; - struct gl_program *vp = st->ctx->VertexProgram._Current; - - edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL || - st->ctx->Polygon.BackMode != GL_FILL; - - vertdata_edgeflags = edgeflags_enabled && - _mesa_draw_edge_flag_array_enabled(st->ctx); + bool edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL || + st->ctx->Polygon.BackMode != GL_FILL; + bool vertdata_edgeflags = edgeflags_enabled && per_vertex_edgeflags; if (vertdata_edgeflags != st->vertdata_edgeflags) { st->vertdata_edgeflags = vertdata_edgeflags; + + struct gl_program *vp = st->ctx->VertexProgram._Current; if (vp) st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_program(vp)); } - edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags && - !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0]; + bool edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags && + !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0]; if (edgeflag_culls_prims != st->edgeflag_culls_prims) { st->edgeflag_culls_prims = edgeflag_culls_prims; st->dirty |= ST_NEW_RASTERIZER; } } +static void check_attrib_edgeflag(struct st_context *st) +{ + st_update_edgeflags(st, _mesa_draw_edge_flag_array_enabled(st->ctx)); +} + /*********************************************************************** * Update all derived state: diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index a9806ba..bf2e80b 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -58,6 +58,7 @@ enum st_pipeline { void st_init_atoms( struct st_context *st ); void st_destroy_atoms( struct st_context *st ); void st_validate_state( struct st_context *st, enum st_pipeline pipeline ); +void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags); void st_setup_arrays(struct st_context *st, @@ -74,6 +75,12 @@ st_setup_current_user(struct st_context *st, struct cso_velems_state *velements, struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers); +struct pipe_vertex_state * +st_create_gallium_vertex_state(struct gl_context *ctx, + const struct gl_vertex_array_object *vao, + struct gl_buffer_object *indexbuf, + uint32_t enabled_attribs); + /* Define ST_NEW_xxx_INDEX */ enum { #define ST_STATE(FLAG, st_update) FLAG##_INDEX, diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index a3eb8e8..8b5fd10 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -322,3 +322,40 @@ st_update_array(struct st_context *st) vbuffer); st->last_num_vbuffers = num_vbuffers; } + +struct pipe_vertex_state * +st_create_gallium_vertex_state(struct gl_context *ctx, + const struct gl_vertex_array_object *vao, + struct gl_buffer_object *indexbuf, + uint32_t enabled_attribs) +{ + struct st_context *st = st_context(ctx); + const GLbitfield inputs_read = enabled_attribs; + const GLbitfield dual_slot_inputs = 0; /* always zero */ + struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers = 0; + struct cso_velems_state velements; + bool uses_user_vertex_buffers; + + setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0, + &velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers); + + if (num_vbuffers != 1 || uses_user_vertex_buffers) { + assert(!"this should never happen with display lists"); + return NULL; + } + + velements.count = util_bitcount(inputs_read); + + struct pipe_screen *screen = st->screen; + struct pipe_vertex_state *state = + screen->create_vertex_state(screen, &vbuffer[0], velements.velems, + velements.count, + indexbuf ? + st_buffer_object(indexbuf)->buffer : NULL, + enabled_attribs); + + for (unsigned i = 0; i < num_vbuffers; i++) + pipe_vertex_buffer_unreference(&vbuffer[i]); + return state; +} diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index b9dbed8..e3157f4 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -285,7 +285,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode ) if (newMode == GL_RENDER) { /* restore normal VBO draw function */ - st_init_draw_functions(&ctx->Driver); + st_init_draw_functions(st->screen, &ctx->Driver); } else if (newMode == GL_SELECT) { if (!st->selection_stage) diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 1c53045..aac1bd6 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -960,7 +960,7 @@ st_init_driver_functions(struct pipe_screen *screen, { _mesa_init_sampler_object_functions(functions); - st_init_draw_functions(functions); + st_init_draw_functions(screen, functions); st_init_blit_functions(functions); st_init_bufferobject_functions(screen, functions); st_init_clear_functions(functions); diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 83f429e..bfe4437 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -309,14 +309,73 @@ st_draw_transform_feedback(struct gl_context *ctx, GLenum mode, cso_draw_vbo(st->cso_context, &info, 0, &indirect, draw); } +static void +st_draw_gallium_vertex_state(struct gl_context *ctx, + struct pipe_vertex_state *state, + struct pipe_draw_vertex_state_info info, + const struct pipe_draw_start_count_bias *draws, + const uint8_t *mode, + unsigned num_draws, + bool per_vertex_edgeflags) +{ + struct st_context *st = st_context(ctx); + bool old_vertdata_edgeflags = st->vertdata_edgeflags; + + /* We don't flag any other states to make st_validate state update edge + * flags, so we need to update them here. + */ + st_update_edgeflags(st, per_vertex_edgeflags); + + prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK_NO_VARRAYS, + ST_PIPELINE_RENDER_NO_VARRAYS); + + struct pipe_context *pipe = st->pipe; + uint32_t velem_mask = ctx->VertexProgram._Current->info.inputs_read; + + if (!mode) { + pipe->draw_vertex_state(pipe, state, velem_mask, info, draws, num_draws); + } else { + /* Find consecutive draws where mode doesn't vary. */ + for (unsigned i = 0, first = 0; i <= num_draws; i++) { + if (i == num_draws || mode[i] != mode[first]) { + unsigned current_num_draws = i - first; + + /* Increase refcount to be able to use take_vertex_state_ownership + * with all draws. + */ + if (i != num_draws && info.take_vertex_state_ownership) + p_atomic_inc(&state->reference.count); + + info.mode = mode[first]; + pipe->draw_vertex_state(pipe, state, velem_mask, info, &draws[first], + current_num_draws); + first = i; + } + } + } + + /* If per-vertex edge flags are different than the non-display-list state, + * just flag ST_NEW_VERTEX_ARRAY, which will also completely revalidate + * edge flags in st_validate_state. + */ + if (st->vertdata_edgeflags != old_vertdata_edgeflags) + st->dirty |= ST_NEW_VERTEX_ARRAYS; +} + void -st_init_draw_functions(struct dd_function_table *functions) +st_init_draw_functions(struct pipe_screen *screen, + struct dd_function_table *functions) { functions->Draw = NULL; functions->DrawGallium = st_draw_gallium; functions->DrawGalliumMultiMode = st_draw_gallium_multimode; functions->DrawIndirect = st_indirect_draw_vbo; functions->DrawTransformFeedback = st_draw_transform_feedback; + + if (screen->get_param(screen, PIPE_CAP_DRAW_VERTEX_STATE)) { + functions->DrawGalliumVertexState = st_draw_gallium_vertex_state; + functions->CreateGalliumVertexState = st_create_gallium_vertex_state; + } } diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index 857e769..d472d1d 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -41,7 +41,8 @@ struct _mesa_prim; struct gl_context; struct st_context; -void st_init_draw_functions(struct dd_function_table *functions); +void st_init_draw_functions(struct pipe_screen *screen, + struct dd_function_table *functions); void st_destroy_draw( struct st_context *st ); diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 30f9cbf..96ed597 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -64,6 +64,14 @@ struct vbo_save_vertex_list { struct pipe_draw_start_count_bias start_count; }; unsigned num_draws; + + struct { + struct gl_context *ctx; + struct pipe_vertex_state *state[VP_MODE_MAX]; + int private_refcount[VP_MODE_MAX]; + GLbitfield enabled_attribs[VP_MODE_MAX]; + struct pipe_draw_vertex_state_info info; + } gallium; } merged; /* Cold: used during construction or to handle egde-cases */ diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index de4a6fe..7f2b1d4 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -900,6 +900,25 @@ end: _mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]); } + /* Prepare for DrawGalliumVertexState */ + if (node->merged.num_draws && ctx->Driver.DrawGalliumVertexState) { + for (unsigned i = 0; i < VP_MODE_MAX; i++) { + uint32_t enabled_attribs = _vbo_get_vao_filter(i) & + node->VAO[i]->_EnabledWithMapMode; + + node->merged.gallium.state[i] = + ctx->Driver.CreateGalliumVertexState(ctx, node->VAO[i], + node->cold->ib.obj, + enabled_attribs); + node->merged.gallium.private_refcount[i] = 0; + node->merged.gallium.enabled_attribs[i] = enabled_attribs; + } + + node->merged.gallium.ctx = ctx; + node->merged.gallium.info.mode = node->merged.info.mode; + node->merged.gallium.info.take_vertex_state_ownership = false; + assert(node->merged.info.index_size == 4); + } /* Deal with GL_COMPILE_AND_EXECUTE: */ diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index 11572a2..50d4896 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -180,6 +180,114 @@ vbo_save_playback_vertex_list_loopback(struct gl_context *ctx, void *data) loopback_vertex_list(ctx, node); } +enum vbo_save_status { + DONE, + USE_SLOW_PATH, +}; + +static enum vbo_save_status +vbo_save_playback_vertex_list_gallium(struct gl_context *ctx, + const struct vbo_save_vertex_list *node, + bool copy_to_current) +{ + /* Don't use this if selection or feedback mode is enabled. st/mesa can't + * handle it. + */ + if (!ctx->Driver.DrawGalliumVertexState || ctx->RenderMode != GL_RENDER) + return USE_SLOW_PATH; + + const gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode; + + /* This sets which vertex arrays are enabled, which determines + * which attribs have stride = 0 and whether edge flags are enabled. + */ + const GLbitfield enabled = node->merged.gallium.enabled_attribs[mode]; + ctx->Array._DrawVAOEnabledAttribs = enabled; + _mesa_set_varying_vp_inputs(ctx, enabled); + + if (ctx->NewState) + _mesa_update_state(ctx); + + /* Use the slow path when there are vertex inputs without vertex + * elements. This happens with zero-stride attribs and non-fixed-func + * shaders. + * + * Dual-slot inputs are also unsupported because the higher slot is + * always missing in vertex elements. + * + * TODO: Add support for zero-stride attribs. + */ + struct gl_program *vp = ctx->VertexProgram._Current; + + if (vp->info.inputs_read & ~enabled || vp->DualSlotInputs) + return USE_SLOW_PATH; + + struct pipe_vertex_state *state = node->merged.gallium.state[mode]; + struct pipe_draw_vertex_state_info info = node->merged.gallium.info; + + /* Return precomputed GL errors such as invalid shaders. */ + if (!ctx->ValidPrimMask) { + _mesa_error(ctx, ctx->DrawGLError, "glCallList"); + return DONE; + } + + if (node->merged.gallium.ctx == ctx) { + /* This mechanism allows passing references to the driver without + * using atomics to increase the reference count. + * + * This private refcount can be decremented without atomics but only + * one context (ctx above) can use this counter (so that it's only + * used by 1 thread). + * + * This number is atomically added to reference.count at + * initialization. If it's never used, the same number is atomically + * subtracted from reference.count before destruction. If this number + * is decremented, we can pass one reference to the driver without + * touching reference.count with atomics. At destruction we only + * subtract the number of references we have not returned. This can + * possibly turn a million atomic increments into 1 add and 1 subtract + * atomic op over the whole lifetime of an app. + */ + int * const private_refcount = (int*)&node->merged.gallium.private_refcount[mode]; + assert(*private_refcount >= 0); + + if (unlikely(*private_refcount == 0)) { + /* pipe_vertex_state can be reused through util_vertex_state_cache, + * and there can be many display lists over-incrementing this number, + * causing it to overflow. + * + * Guess that the same state can never be used by N=500000 display + * lists, so one display list can only increment it by + * INT_MAX / N. + */ + const int add_refs = INT_MAX / 500000; + p_atomic_add(&state->reference.count, add_refs); + *private_refcount = add_refs; + } + + (*private_refcount)--; + info.take_vertex_state_ownership = true; + } + + /* Fast path using a pre-built gallium vertex buffer state. */ + if (node->merged.mode || node->merged.num_draws > 1) { + ctx->Driver.DrawGalliumVertexState(ctx, state, info, + node->merged.start_counts, + node->merged.mode, + node->merged.num_draws, + enabled & VERT_ATTRIB_EDGEFLAG); + } else if (node->merged.num_draws) { + ctx->Driver.DrawGalliumVertexState(ctx, state, info, + &node->merged.start_count, + NULL, 1, + enabled & VERT_ATTRIB_EDGEFLAG); + } + + if (copy_to_current) + playback_copy_to_current(ctx, node); + return DONE; +} + /** * Execute the buffer and save copied verts. * This is called from the display list code when executing @@ -202,6 +310,9 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data, bool copy_to_c return; } + if (vbo_save_playback_vertex_list_gallium(ctx, node, copy_to_current) == DONE) + return; + bind_vertex_list(ctx, node); /* Need that at least one time. */ -- 2.7.4