lavapipe: implement inline variant caching

author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>

Fri, 7 Apr 2023 15:24:36 +0000 (11:24 -0400)

committer Marge Bot <emma+marge@anholt.net>

Tue, 11 Apr 2023 01:37:05 +0000 (01:37 +0000)
author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fri, 7 Apr 2023 15:24:36 +0000 (11:24 -0400)
committer Marge Bot <emma+marge@anholt.net>
Tue, 11 Apr 2023 01:37:05 +0000 (01:37 +0000)
diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c

index 690541e..bc8c7d6 100644 (file)
--- a/src/gallium/frontends/lavapipe/lvp_execute.c
+++ b/src/gallium/frontends/lavapipe/lvp_execute.c
@@ -282,18 +282,18 @@ update_pcbuf(struct rendering_state *state, enum pipe_shader_type pstage)
  static void
  update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type sh, bool pcbuf_dirty, bool constbuf_dirty)
  {
-   uint32_t inline_uniforms[MAX_INLINABLE_UNIFORMS];
     unsigned stage = tgsi_processor_to_shader_stage(sh);
     state->inlines_dirty[sh] = false;
     struct lvp_shader *shader = state->shaders[stage];
     if (!shader || !shader->inlines.can_inline)
        return;
+   struct lvp_inline_variant v;
+   v.mask = shader->inlines.can_inline;
     /* these buffers have already been flushed in llvmpipe, so they're safe to read */
     nir_shader *base_nir = shader->pipeline_nir->nir;
     if (stage == MESA_SHADER_TESS_EVAL && state->tess_ccw)
        base_nir = shader->tess_ccw->nir;
-   nir_shader *nir = nir_shader_clone(shader->pipeline_nir->nir, base_nir);
-   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+   nir_function_impl *impl = nir_shader_get_entrypoint(base_nir);
     unsigned ssa_alloc = impl->ssa_alloc;
     unsigned count = shader->inlines.count[0];
     if (count && pcbuf_dirty) {
@@ -301,20 +301,21 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type
        for (unsigned i = 0; i < count; i++) {
           unsigned offset = shader->inlines.uniform_offsets[0][i];
           if (offset < push_size) {
-            memcpy(&inline_uniforms[i], &state->push_constants[offset], sizeof(uint32_t));
+            memcpy(&v.vals[0][i], &state->push_constants[offset], sizeof(uint32_t));
           } else {
              for (unsigned i = 0; i < state->uniform_blocks[sh].count; i++) {
                 if (offset < push_size + state->uniform_blocks[sh].size[i]) {
                    unsigned ubo_offset = offset - push_size;
                    uint8_t *block = state->uniform_blocks[sh].block[i];
-                  memcpy(&inline_uniforms[i], &block[ubo_offset], sizeof(uint32_t));
+                  memcpy(&v.vals[0][i], &block[ubo_offset], sizeof(uint32_t));
                    break;
                 }
                 push_size += state->uniform_blocks[sh].size[i];
              }
           }
        }
-      NIR_PASS_V(nir, lvp_inline_uniforms, shader, inline_uniforms, 0);
+      for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++)
+         v.vals[0][i] = 0;
     }
     if (constbuf_dirty) {
        struct pipe_box box = {0};
@@ -331,24 +332,42 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type
           uint8_t *map = state->pctx->buffer_map(state->pctx, pres, 0, PIPE_MAP_READ, &box, &xfer);
           for (unsigned i = 0; i < count; i++) {
              unsigned offset = shader->inlines.uniform_offsets[slot][i];
-            memcpy(&inline_uniforms[i], map + offset, sizeof(uint32_t));
+            memcpy(&v.vals[slot][i], map + offset, sizeof(uint32_t));
           }
           state->pctx->buffer_unmap(state->pctx, xfer);
-         NIR_PASS_V(nir, lvp_inline_uniforms, shader, inline_uniforms, slot);
+         for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++)
+            v.vals[slot][i] = 0;
        }
     }
-   lvp_shader_optimize(nir);
-   impl = nir_shader_get_entrypoint(nir);
+   bool found = false;
+   struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(&shader->inlines.variants, v.mask, &v, &found);
     void *shader_state;
-   if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 &&
-       !shader->inlines.must_inline) {
-      /* not enough change; don't inline further */
-      shader->inlines.can_inline = 0;
-      ralloc_free(nir);
-      shader->shader_cso = lvp_shader_compile(state->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir));
-      shader_state = shader->shader_cso;
+   if (found) {
+      const struct lvp_inline_variant *variant = entry->key;
+      shader_state = variant->cso;
     } else {
-      shader_state = lvp_shader_compile(state->device, shader, nir);
+      nir_shader *nir = nir_shader_clone(NULL, base_nir);
+      NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[0], 0);
+      if (constbuf_dirty) {
+         u_foreach_bit(slot, shader->inlines.can_inline)
+            NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[slot], slot);
+      }
+      lvp_shader_optimize(nir);
+      impl = nir_shader_get_entrypoint(nir);
+      if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 &&
+         !shader->inlines.must_inline) {
+         /* not enough change; don't inline further */
+         shader->inlines.can_inline = 0;
+         ralloc_free(nir);
+         shader->shader_cso = lvp_shader_compile(state->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir));
+         _mesa_set_remove(&shader->inlines.variants, entry);
+         shader_state = shader->shader_cso;
+      } else {
+         shader_state = lvp_shader_compile(state->device, shader, nir);
+         struct lvp_inline_variant *variant = mem_dup(&v, sizeof(v));
+         variant->cso = shader_state;
+         entry->key = variant;
+      }
     }
     switch (sh) {
     case MESA_SHADER_VERTEX:
diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c

index b86429e..ffd7124 100644 (file)
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -56,6 +56,12 @@ shader_destroy(struct lvp_device *device, struct lvp_shader *shader)
        device->queue.ctx->delete_fs_state,
        device->queue.ctx->delete_compute_state,
     };
+   set_foreach(&shader->inlines.variants, entry) {
+      struct lvp_inline_variant *variant = (void*)entry->key;
+      destroy[stage](device->queue.ctx, variant->cso);
+      free(variant);
+   }
+   ralloc_free(shader->inlines.variants.table);
     if (shader->shader_cso)
        destroy[stage](device->queue.ctx, shader->shader_cso);
     if (shader->tess_ccw_cso)
@@ -439,6 +445,18 @@ compile_spirv(struct lvp_device *pdevice, const VkPipelineShaderStageCreateInfo
     return result;
  }
  
+static bool
+inline_variant_equals(const void *a, const void *b)
+{
+   const struct lvp_inline_variant *av = a, *bv = b;
+   assert(av->mask == bv->mask);
+   u_foreach_bit(slot, av->mask) {
+      if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot])))
+         return false;
+   }
+   return true;
+}
+
  static void
  lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader *shader, struct lvp_pipeline_layout *layout)
  {
@@ -528,6 +546,8 @@ lvp_shader_lower(struct lvp_device *pdevice, nir_shader *nir, struct lvp_shader
     if (impl->ssa_alloc > 100) //skip for small shaders
        shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir);
     shader->pipeline_nir = create_pipeline_nir(nir);
+   if (shader->inlines.can_inline)
+      _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals);
  }
  
  static VkResult
@@ -782,6 +802,8 @@ copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src)
     dst->tess_ccw = NULL; //this gets handled later
     assert(!dst->shader_cso);
     assert(!dst->tess_ccw_cso);
+   if (src->inlines.can_inline)
+      _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals);
  }
  
  static VkResult
@@ -833,9 +855,10 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
              pipeline->line_smooth = p->line_smooth;
              pipeline->disable_multisample = p->disable_multisample;
              pipeline->line_rectangular = p->line_rectangular;
-            pipeline->last_vertex = p->last_vertex;
-            for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++)
+            memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4);
+            for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
                 copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]);
+            }
           }
           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
              pipeline->force_min_sample = p->force_min_sample;
diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h

index 122d50d..613e78a 100644 (file)
--- a/src/gallium/frontends/lavapipe/lvp_private.h
+++ b/src/gallium/frontends/lavapipe/lvp_private.h
@@ -432,6 +432,12 @@ lvp_pipeline_nir_ref(struct lvp_pipeline_nir **dst, struct lvp_pipeline_nir *src
     *dst = src;
  }
  
+struct lvp_inline_variant {
+   uint32_t mask;
+   uint32_t vals[PIPE_MAX_CONSTANT_BUFFERS][MAX_INLINABLE_UNIFORMS];
+   void *cso;
+};
+
  struct lvp_shader {
     struct vk_object_base base;
     struct lvp_pipeline_layout *layout;
@@ -445,6 +451,7 @@ struct lvp_shader {
        uint8_t count[PIPE_MAX_CONSTANT_BUFFERS];
        bool must_inline;
        uint32_t can_inline; //bitmask
+      struct set variants;
     } inlines;
     struct pipe_stream_output_info stream_output;
     struct blob blob; //preserved for GetShaderBinaryDataEXT
author	Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
	Fri, 7 Apr 2023 15:24:36 +0000 (11:24 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 11 Apr 2023 01:37:05 +0000 (01:37 +0000)
src/gallium/frontends/lavapipe/lvp_execute.c		patch \| blob \| history
src/gallium/frontends/lavapipe/lvp_pipeline.c		patch \| blob \| history
src/gallium/frontends/lavapipe/lvp_private.h		patch \| blob \| history