dlist: implement vertices deduplication
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Fri, 16 Jul 2021 12:16:43 +0000 (14:16 +0200)
committerPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Mon, 2 Aug 2021 09:13:20 +0000 (11:13 +0200)
Now that the vertices attributes are in RAM, we can easily
compute their hash and compare to earlier vertices (in the
same list so they have compatible vertex_size).

We can't do that for list that will be executed using
loopback because the replay code ignore the index buffer.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11927>

src/mesa/vbo/vbo_save_api.c

index 7a56b5e..f36fa41 100644 (file)
@@ -83,6 +83,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/varray.h"
 #include "util/bitscan.h"
 #include "util/u_memory.h"
+#include "util/hash_table.h"
 
 #include "gallium/include/pipe/p_state.h"
 
@@ -448,6 +449,85 @@ realloc_storage(struct gl_context *ctx, int prim_count, int vertex_count)
    }
 }
 
+struct vertex_key {
+   unsigned vertex_size;
+   fi_type *vertex_attributes;
+};
+
+static uint32_t _hash_vertex_key(const void *key)
+{
+   struct vertex_key *k = (struct vertex_key*)key;
+   unsigned sz = k->vertex_size;
+   assert(sz);
+   return _mesa_hash_data(k->vertex_attributes, sz * sizeof(float));
+}
+
+static bool _compare_vertex_key(const void *key1, const void *key2)
+{
+   struct vertex_key *k1 = (struct vertex_key*)key1;
+   struct vertex_key *k2 = (struct vertex_key*)key2;
+   /* All the compared vertices are going to be drawn with the same VAO,
+    * so we can compare the attributes. */
+   assert (k1->vertex_size == k2->vertex_size);
+   return memcmp(k1->vertex_attributes,
+                 k2->vertex_attributes,
+                 k1->vertex_size * sizeof(float)) == 0;
+}
+
+static void _free_entry(struct hash_entry *entry)
+{
+   free((void*)entry->key);
+}
+
+/* Add vertex to the vertex buffer and return its index. If this vertex is a duplicate
+ * of an existing vertex, return the original index instead.
+ */
+static uint32_t
+add_vertex(struct vbo_save_context *save, struct hash_table *hash_to_index,
+           uint32_t index, uint32_t offset_in_bytes, uint32_t base_index, fi_type *new_buffer,
+           uint32_t *max_index)
+{
+   /* If vertex deduplication is disabled return the original index. */
+   if (!hash_to_index)
+      return index;
+
+   /* Apply the offset into buffer_in_ram ... */
+   fi_type *vert = save->vertex_store->buffer_in_ram + offset_in_bytes / sizeof(float);
+   /* ... and cancel the start_offset trick.
+    * This way we get the correct offset in all cases (= start_offset being 0 or not).
+    */
+   vert += save->vertex_size * (index - base_index);
+
+   struct vertex_key *key = malloc(sizeof(struct vertex_key));
+   key->vertex_size = save->vertex_size;
+   key->vertex_attributes = vert;
+
+   struct hash_entry *entry = _mesa_hash_table_search(hash_to_index, key);
+   if (entry) {
+      free(key);
+      /* We found an existing vertex with the same hash, return its index. */
+      return (uintptr_t) entry->data;
+   } else {
+      /* This is a new vertex. Determine a new index and copy its attributes to the vertex
+       * buffer. Note that 'new_buffer' is created at each list compilation so we write vertices
+       * starting at index 0.
+       */
+      uint32_t n = _mesa_hash_table_num_entries(hash_to_index);
+      *max_index = MAX2(n + base_index, *max_index);
+
+      memcpy(&new_buffer[save->vertex_size * n],
+             vert,
+             save->vertex_size * sizeof(fi_type));
+
+      _mesa_hash_table_insert(hash_to_index, key, (void*)(uintptr_t)(n + base_index));
+
+      /* The index buffer is shared between list compilations, so add the base index to get
+       * the final index.
+       */
+      return n + base_index;
+   }
+}
+
 
 /**
  * Insert the active immediate struct onto the display list currently
@@ -614,6 +694,18 @@ compile_vertex_list(struct gl_context *ctx)
    struct _mesa_prim *merged_prims = NULL;
 
    int idx = 0;
+   struct hash_table *vertex_to_index = NULL;
+   fi_type *temp_vertices_buffer = NULL;
+
+   /* The loopback replay code doesn't use the index buffer, so we can't
+    * dedup vertices in this case.
+    */
+   if (!ctx->ListState.Current.UseLoopback) {
+      vertex_to_index = _mesa_hash_table_create(NULL, _hash_vertex_key, _compare_vertex_key);
+      temp_vertices_buffer = malloc(save->vertex_store->buffer_in_ram_size);
+   }
+
+   uint32_t max_index = 0;
 
    int last_valid_prim = -1;
    /* Construct indices array. */
@@ -647,13 +739,17 @@ compile_vertex_list(struct gl_context *ctx)
          unsigned tri_count = merged_prims[last_valid_prim].count - 2;
 
          indices[idx] = indices[idx - 1];
-         indices[idx + 1] = original_prims[i].start;
+         indices[idx + 1] = add_vertex(save, vertex_to_index, original_prims[i].start,
+                                       original_buffer_offset, start_offset,
+                                       temp_vertices_buffer, &max_index);
          idx += 2;
          merged_prims[last_valid_prim].count += 2;
 
          if (tri_count % 2) {
             /* Add another index to preserve winding order */
-            indices[idx++] = original_prims[i].start;
+            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start,
+                                        original_buffer_offset, start_offset,
+                                        temp_vertices_buffer, &max_index);
             merged_prims[last_valid_prim].count++;
          }
       }
@@ -670,10 +766,14 @@ compile_vertex_list(struct gl_context *ctx)
             (original_prims[i + 1].mode == GL_LINE_STRIP ||
              original_prims[i + 1].mode == GL_LINES)))) {
          for (unsigned j = 0; j < vertex_count; j++) {
-            indices[idx++] = original_prims[i].start + j;
+            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+                                        original_buffer_offset, start_offset,
+                                        temp_vertices_buffer, &max_index);
             /* Repeat all but the first/last indices. */
             if (j && j != vertex_count - 1) {
-               indices[idx++] = original_prims[i].start + j;
+               indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+                                           original_buffer_offset, start_offset,
+                                           temp_vertices_buffer, &max_index);
             }
          }
       } else {
@@ -681,7 +781,9 @@ compile_vertex_list(struct gl_context *ctx)
          mode = original_prims[i].mode;
 
          for (unsigned j = 0; j < vertex_count; j++) {
-            indices[idx++] = original_prims[i].start + j;
+            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+                                        original_buffer_offset, start_offset,
+                                        temp_vertices_buffer, &max_index);
          }
       }
 
@@ -712,11 +814,11 @@ compile_vertex_list(struct gl_context *ctx)
       _mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
       save->previous_ib = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
       bool success = ctx->Driver.BufferData(ctx,
-                                         GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                         MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t),
-                                         NULL,
-                                         GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
-                                         save->previous_ib);
+                                            GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                            MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t),
+                                            NULL,
+                                            GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
+                                            save->previous_ib);
       if (!success) {
          _mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
@@ -737,11 +839,23 @@ compile_vertex_list(struct gl_context *ctx)
       node->cold->prim_count = 0;
    }
 
-   ctx->Driver.BufferSubData(ctx,
-                             original_buffer_offset,
-                             idx * save->vertex_size * sizeof(fi_type),
-                             &save->vertex_store->buffer_in_ram[original_buffer_offset / sizeof(float)],
-                             save->vertex_store->bufferobj);
+
+  if (vertex_to_index) {
+      ctx->Driver.BufferSubData(ctx,
+                                original_buffer_offset,
+                                (max_index - start_offset + 1) * save->vertex_size * sizeof(fi_type),
+                                temp_vertices_buffer,
+                                save->vertex_store->bufferobj);
+
+      _mesa_hash_table_destroy(vertex_to_index, _free_entry);
+      free(temp_vertices_buffer);
+   } else {
+      ctx->Driver.BufferSubData(ctx,
+                                original_buffer_offset,
+                                idx * save->vertex_size * sizeof(fi_type),
+                                &save->vertex_store->buffer_in_ram[original_buffer_offset / sizeof(float)],
+                                save->vertex_store->bufferobj);
+   }
 
    /* Prepare for DrawGallium */
    memset(&node->merged.info, 0, sizeof(struct pipe_draw_info));