From de70c0cf88bd46a5e5fc7f8c38a64733a7c30042 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Fri, 7 Apr 2023 11:24:36 -0400 Subject: [PATCH] lavapipe: implement inline variant caching inlining is great, but it's less great if a new variant must be created for every draw to avoid this, cache inlined variants for reuse Reviewed-by: Brian Paul Part-of: --- src/gallium/frontends/lavapipe/lvp_execute.c | 55 ++++++++++++++++++--------- src/gallium/frontends/lavapipe/lvp_pipeline.c | 27 ++++++++++++- src/gallium/frontends/lavapipe/lvp_private.h | 7 ++++ 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 690541e..bc8c7d6 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -282,18 +282,18 @@ update_pcbuf(struct rendering_state *state, enum pipe_shader_type pstage) static void update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type sh, bool pcbuf_dirty, bool constbuf_dirty) { - uint32_t inline_uniforms[MAX_INLINABLE_UNIFORMS]; unsigned stage = tgsi_processor_to_shader_stage(sh); state->inlines_dirty[sh] = false; struct lvp_shader *shader = state->shaders[stage]; if (!shader || !shader->inlines.can_inline) return; + struct lvp_inline_variant v; + v.mask = shader->inlines.can_inline; /* these buffers have already been flushed in llvmpipe, so they're safe to read */ nir_shader *base_nir = shader->pipeline_nir->nir; if (stage == MESA_SHADER_TESS_EVAL && state->tess_ccw) base_nir = shader->tess_ccw->nir; - nir_shader *nir = nir_shader_clone(shader->pipeline_nir->nir, base_nir); - nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_function_impl *impl = nir_shader_get_entrypoint(base_nir); unsigned ssa_alloc = impl->ssa_alloc; unsigned count = shader->inlines.count[0]; if (count && pcbuf_dirty) { @@ -301,20 +301,21 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type for (unsigned i = 0; i < count; i++) { unsigned offset = shader->inlines.uniform_offsets[0][i]; if (offset < push_size) { - memcpy(&inline_uniforms[i], &state->push_constants[offset], sizeof(uint32_t)); + memcpy(&v.vals[0][i], &state->push_constants[offset], sizeof(uint32_t)); } else { for (unsigned i = 0; i < state->uniform_blocks[sh].count; i++) { if (offset < push_size + state->uniform_blocks[sh].size[i]) { unsigned ubo_offset = offset - push_size; uint8_t *block = state->uniform_blocks[sh].block[i]; - memcpy(&inline_uniforms[i], &block[ubo_offset], sizeof(uint32_t)); + memcpy(&v.vals[0][i], &block[ubo_offset], sizeof(uint32_t)); break; } push_size += state->uniform_blocks[sh].size[i]; } } } - NIR_PASS_V(nir, lvp_inline_uniforms, shader, inline_uniforms, 0); + for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++) + v.vals[0][i] = 0; } if (constbuf_dirty) { struct pipe_box box = {0}; @@ -331,24 +332,42 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type uint8_t *map = state->pctx->buffer_map(state->pctx, pres, 0, PIPE_MAP_READ, &box, &xfer); for (unsigned i = 0; i < count; i++) { unsigned offset = shader->inlines.uniform_offsets[slot][i]; - memcpy(&inline_uniforms[i], map + offset, sizeof(uint32_t)); + memcpy(&v.vals[slot][i], map + offset, sizeof(uint32_t)); } state->pctx->buffer_unmap(state->pctx, xfer); - NIR_PASS_V(nir, lvp_inline_uniforms, shader, inline_uniforms, slot); + for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++) + v.vals[slot][i] = 0; } } - lvp_shader_optimize(nir); - impl = nir_shader_get_entrypoint(nir); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(&shader->inlines.variants, v.mask, &v, &found); void *shader_state; - if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 && - !shader->inlines.must_inline) { - /* not enough change; don't inline further */ - shader->inlines.can_inline = 0; - ralloc_free(nir); - shader->shader_cso = lvp_shader_compile(state->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir)); - shader_state = shader->shader_cso; + if (found) { + const struct lvp_inline_variant *variant = entry->key; + shader_state = variant->cso; } else { - shader_state = lvp_shader_compile(state->device, shader, nir); + nir_shader *nir = nir_shader_clone(NULL, base_nir); + NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[0], 0); + if (constbuf_dirty) { + u_foreach_bit(slot, shader->inlines.can_inline) + NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[slot], slot); + } + lvp_shader_optimize(nir); + impl = nir_shader_get_entrypoint(nir); + if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 && + !shader->inlines.must_inline) { + /* not enough change; don't inline further */ + shader->inlines.can_inline = 0; + ralloc_free(nir); + shader->shader_cso = lvp_shader_compile(state->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir)); + _mesa_set_remove(&shader->inlines.variants, entry); + shader_state = shader->shader_cso; + } else { + shader_state = lvp_shader_compile(state->device, shader, nir); + struct lvp_inline_variant *variant = mem_dup(&v, sizeof(v)); + variant->cso = shader_state; + entry->key = variant; + } } switch (sh) { case MESA_SHADER_VERTEX: diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index b86429e..ffd7124 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -56,6 +56,12 @@ shader_destroy(struct lvp_device *device, struct lvp_shader *shader) device->queue.ctx->delete_fs_state, device->queue.ctx->delete_compute_state, }; + set_foreach(&shader->inlines.variants, entry) { + struct lvp_inline_variant *variant = (void*)entry->key; + destroy[stage](device->queue.ctx, variant->cso); + free(variant); + } + ralloc_free(shader->inlines.variants.table); if (shader->shader_cso) destroy[stage](device->queue.ctx, shader->shader_cso); if (shader->tess_ccw_cso) @@ -439,6 +445,18 @@ compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo return result; } +static bool +inline_variant_equals(const void *a, const void *b) +{ + const struct lvp_inline_variant *av = a, *bv = b; + assert(av->mask == bv->mask); + u_foreach_bit(slot, av->mask) { + if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot]))) + return false; + } + return true; +} + static void lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader *shader, struct lvp_pipeline_layout *layout) { @@ -528,6 +546,8 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader if (impl->ssa_alloc > 100) //skip for small shaders shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir); shader->pipeline_nir = create_pipeline_nir(nir); + if (shader->inlines.can_inline) + _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals); } static VkResult @@ -782,6 +802,8 @@ copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src) dst->tess_ccw = NULL; //this gets handled later assert(!dst->shader_cso); assert(!dst->tess_ccw_cso); + if (src->inlines.can_inline) + _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals); } static VkResult @@ -833,9 +855,10 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, pipeline->line_smooth = p->line_smooth; pipeline->disable_multisample = p->disable_multisample; pipeline->line_rectangular = p->line_rectangular; - pipeline->last_vertex = p->last_vertex; - for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) + memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4); + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]); + } } if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { pipeline->force_min_sample = p->force_min_sample; diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 122d50d..613e78a 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -432,6 +432,12 @@ lvp_pipeline_nir_ref(struct lvp_pipeline_nir **dst, struct lvp_pipeline_nir *src *dst = src; } +struct lvp_inline_variant { + uint32_t mask; + uint32_t vals[PIPE_MAX_CONSTANT_BUFFERS][MAX_INLINABLE_UNIFORMS]; + void *cso; +}; + struct lvp_shader { struct vk_object_base base; struct lvp_pipeline_layout *layout; @@ -445,6 +451,7 @@ struct lvp_shader { uint8_t count[PIPE_MAX_CONSTANT_BUFFERS]; bool must_inline; uint32_t can_inline; //bitmask + struct set variants; } inlines; struct pipe_stream_output_info stream_output; struct blob blob; //preserved for GetShaderBinaryDataEXT -- 2.7.4