gallium,util: Pull u_indices and u_primconvert back into gallium
[platform/upstream/mesa.git] / src / mesa / vbo / vbo_save_api.c
index 407b13e..a41d518 100644 (file)
@@ -110,19 +110,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/macros.h"
 #include "main/draw_validate.h"
 #include "main/api_arrayelt.h"
-#include "main/vtxfmt.h"
 #include "main/dispatch.h"
 #include "main/state.h"
 #include "main/varray.h"
 #include "util/bitscan.h"
 #include "util/u_memory.h"
 #include "util/hash_table.h"
+#include "gallium/auxiliary/indices/u_indices.h"
+#include "util/u_prim.h"
 
 #include "gallium/include/pipe/p_state.h"
 
-#include "vbo_noop.h"
 #include "vbo_private.h"
-
+#include "api_exec_decl.h"
+#include "api_save.h"
 
 #ifdef ERROR
 #undef ERROR
@@ -140,14 +141,6 @@ _save_EvalCoord1f(GLfloat u);
 static void GLAPIENTRY
 _save_EvalCoord2f(GLfloat u, GLfloat v);
 
-static void
-handle_out_of_memory(struct gl_context *ctx)
-{
-   struct vbo_save_context *save = &vbo_context(ctx)->save;
-   _mesa_noop_vtxfmt_init(ctx, &save->vtxfmt);
-   save->out_of_memory = true;
-}
-
 /*
  * NOTE: Old 'parity' issue is gone, but copying can still be
  * wrong-footed on replay.
@@ -168,24 +161,13 @@ copy_vertices(struct gl_context *ctx,
    assert(save->copied.buffer == NULL);
    save->copied.buffer = malloc(sizeof(fi_type) * sz * prim->count);
 
-   return vbo_copy_vertices(ctx, prim->mode, prim->start, &prim->count,
-                            prim->begin, sz, true, save->copied.buffer, src);
-}
-
-
-static struct vbo_save_vertex_store *
-realloc_vertex_store(struct vbo_save_vertex_store *store, uint32_t vertex_size, int vertex_count)
-{
-   if (!store)
-      store = CALLOC_STRUCT(vbo_save_vertex_store);
-
-   int new_size = MAX2(vertex_count * vertex_size * sizeof(GLfloat), 1024 * 1024);
-   if (new_size > store->buffer_in_ram_size) {
-      store->buffer_in_ram_size = new_size;
-      store->buffer_in_ram = realloc(store->buffer_in_ram, store->buffer_in_ram_size);
+   unsigned r = vbo_copy_vertices(ctx, prim->mode, prim->start, &prim->count,
+                                  prim->begin, sz, true, save->copied.buffer, src);
+   if (!r) {
+      free(save->copied.buffer);
+      save->copied.buffer = NULL;
    }
-
-   return store;
+   return r;
 }
 
 
@@ -196,10 +178,11 @@ realloc_prim_store(struct vbo_save_primitive_store *store, int prim_count)
       store = CALLOC_STRUCT(vbo_save_primitive_store);
 
    uint32_t old_size = store->size;
-   store->size = MAX3(store->size, prim_count, 128);
+   store->size = prim_count;
+   assert (old_size < store->size);
    store->prims = realloc(store->prims, store->size * sizeof(struct _mesa_prim));
    memset(&store->prims[old_size], 0, (store->size - old_size) * sizeof(struct _mesa_prim));
-   store->used = 0;
+
    return store;
 }
 
@@ -409,39 +392,33 @@ update_vao(struct gl_context *ctx,
    _mesa_set_vao_immutable(ctx, *vao);
 }
 
-
 static void wrap_filled_vertex(struct gl_context *ctx);
-static void compile_vertex_list(struct gl_context *ctx);
 
+/* Grow the vertex storage to accomodate for vertex_count new vertices */
 static void
-realloc_storage(struct gl_context *ctx, int prim_count, int vertex_count)
+grow_vertex_storage(struct gl_context *ctx, int vertex_count)
 {
    struct vbo_save_context *save = &vbo_context(ctx)->save;
+   assert (save->vertex_store);
+
+   int new_size = (save->vertex_store->used +
+                   vertex_count * save->vertex_size) * sizeof(GLfloat);
 
    /* Limit how much memory we allocate. */
    if (save->prim_store->used > 0 &&
        vertex_count > 0 &&
-       vertex_count * save->vertex_size > VBO_SAVE_BUFFER_SIZE) {
+       new_size > VBO_SAVE_BUFFER_SIZE) {
       wrap_filled_vertex(ctx);
-      vertex_count = VBO_SAVE_BUFFER_SIZE / save->vertex_size;
+      new_size = VBO_SAVE_BUFFER_SIZE;
    }
 
-   if (prim_count > 0 &&
-       prim_count * sizeof(struct _mesa_prim) > VBO_SAVE_BUFFER_SIZE) {
-      if (save->prim_store->used > 0)
-         compile_vertex_list(ctx);
-      prim_count = VBO_SAVE_BUFFER_SIZE / sizeof(struct _mesa_prim);
+   if (new_size > save->vertex_store->buffer_in_ram_size) {
+      save->vertex_store->buffer_in_ram_size = new_size;
+      save->vertex_store->buffer_in_ram = realloc(save->vertex_store->buffer_in_ram,
+                                                  save->vertex_store->buffer_in_ram_size);
+      if (save->vertex_store->buffer_in_ram == NULL)
+         save->out_of_memory = true;
    }
-
-   if (vertex_count >= 0)
-      save->vertex_store = realloc_vertex_store(save->vertex_store, save->vertex_size, vertex_count);
-
-   if (prim_count >= 0)
-      save->prim_store = realloc_prim_store(save->prim_store, prim_count);
-
-   if (save->vertex_store->buffer_in_ram == NULL ||
-       save->prim_store->prims == NULL)
-      handle_out_of_memory(ctx);
 }
 
 struct vertex_key {
@@ -546,7 +523,6 @@ compile_vertex_list(struct gl_context *ctx)
    if (!node)
       return;
 
-   memset(node, 0, sizeof(struct vbo_save_vertex_list));
    node->cold = calloc(1, sizeof(*node->cold));
 
    /* Make sure the pointer is aligned to the size of a pointer */
@@ -582,7 +558,7 @@ compile_vertex_list(struct gl_context *ctx)
                    current_size * sizeof(GLfloat));
          } else {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "Current value allocation");
-            handle_out_of_memory(ctx);
+            save->out_of_memory = true;
          }
       }
    }
@@ -621,10 +597,11 @@ compile_vertex_list(struct gl_context *ctx)
    node->cold->min_index = node->cold->prims[0].start;
    node->cold->max_index = end - 1;
 
-   int max_index_count = total_vert_count * 2;
-
-   int size = max_index_count * sizeof(uint32_t);
-   uint32_t* indices = (uint32_t*) malloc(size);
+   /* converting primitive types may result in many more indices */
+   bool all_prims_supported = (ctx->Const.DriverSupportedPrimMask & BITFIELD_MASK(PIPE_PRIM_MAX)) == BITFIELD_MASK(PIPE_PRIM_MAX);
+   int max_index_count = total_vert_count * (all_prims_supported ? 2 : 3);
+   uint32_t* indices = (uint32_t*) malloc(max_index_count * sizeof(uint32_t));
+   void *tmp_indices = all_prims_supported ? NULL : malloc(max_index_count * sizeof(uint32_t));
    struct _mesa_prim *merged_prims = NULL;
 
    int idx = 0;
@@ -646,16 +623,41 @@ compile_vertex_list(struct gl_context *ctx)
    for (unsigned i = 0; i < node->cold->prim_count; i++) {
       assert(original_prims[i].basevertex == 0);
       GLubyte mode = original_prims[i].mode;
+      bool converted_prim = false;
+      unsigned index_size;
 
       int vertex_count = original_prims[i].count;
       if (!vertex_count) {
          continue;
       }
 
+      /* Increase indices storage if the original estimation was too small. */
+      if (idx + 3 * vertex_count > max_index_count) {
+         max_index_count = max_index_count + 3 * vertex_count;
+         indices = (uint32_t*) realloc(indices, max_index_count * sizeof(uint32_t));
+         tmp_indices = all_prims_supported ? NULL : realloc(tmp_indices, max_index_count * sizeof(uint32_t));
+      }
+
       /* Line strips may get converted to lines */
       if (mode == GL_LINE_STRIP)
          mode = GL_LINES;
 
+      if (!(ctx->Const.DriverSupportedPrimMask & BITFIELD_BIT(mode))) {
+         unsigned new_count;
+         u_generate_func trans_func;
+         enum pipe_prim_type pmode = (enum pipe_prim_type)mode;
+         u_index_generator(ctx->Const.DriverSupportedPrimMask,
+                           pmode, original_prims[i].start, vertex_count,
+                           PV_LAST, PV_LAST,
+                           &pmode, &index_size, &new_count,
+                           &trans_func);
+         if (new_count > 0)
+            trans_func(original_prims[i].start, new_count, tmp_indices);
+         vertex_count = new_count;
+         mode = (GLubyte)pmode;
+         converted_prim = true;
+      }
+
       /* If 2 consecutive prims use the same mode => merge them. */
       bool merge_prims = last_valid_prim >= 0 &&
                          mode == merged_prims[last_valid_prim].mode &&
@@ -663,6 +665,8 @@ compile_vertex_list(struct gl_context *ctx)
                          mode != GL_QUAD_STRIP && mode != GL_POLYGON &&
                          mode != GL_PATCHES;
 
+/* index generation uses uint16_t if the index count is small enough */
+#define CAST_INDEX(BASE, SIZE, IDX) ((SIZE == 2 ? (uint32_t)(((uint16_t*)BASE)[IDX]) : ((uint32_t*)BASE)[IDX]))
       /* To be able to merge consecutive triangle strips we need to insert
        * a degenerate triangle.
        */
@@ -673,14 +677,16 @@ compile_vertex_list(struct gl_context *ctx)
          unsigned tri_count = merged_prims[last_valid_prim].count - 2;
 
          indices[idx] = indices[idx - 1];
-         indices[idx + 1] = add_vertex(save, vertex_to_index, original_prims[i].start,
+         indices[idx + 1] = add_vertex(save, vertex_to_index,
+                                       converted_prim ? CAST_INDEX(tmp_indices, index_size, 0) : original_prims[i].start,
                                        temp_vertices_buffer, &max_index);
          idx += 2;
          merged_prims[last_valid_prim].count += 2;
 
          if (tri_count % 2) {
             /* Add another index to preserve winding order */
-            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start,
+            indices[idx++] = add_vertex(save, vertex_to_index,
+                                        converted_prim ? CAST_INDEX(tmp_indices, index_size, 0) : original_prims[i].start,
                                         temp_vertices_buffer, &max_index);
             merged_prims[last_valid_prim].count++;
          }
@@ -698,24 +704,40 @@ compile_vertex_list(struct gl_context *ctx)
             (original_prims[i + 1].mode == GL_LINE_STRIP ||
              original_prims[i + 1].mode == GL_LINES)))) {
          for (unsigned j = 0; j < vertex_count; j++) {
-            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+            indices[idx++] = add_vertex(save, vertex_to_index,
+                                        converted_prim ? CAST_INDEX(tmp_indices, index_size, j) : original_prims[i].start + j,
                                         temp_vertices_buffer, &max_index);
             /* Repeat all but the first/last indices. */
             if (j && j != vertex_count - 1) {
-               indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+               indices[idx++] = add_vertex(save, vertex_to_index,
+                                           converted_prim ? CAST_INDEX(tmp_indices, index_size, j) : original_prims[i].start + j,
                                            temp_vertices_buffer, &max_index);
             }
          }
       } else {
          /* We didn't convert to LINES, so restore the original mode */
-         mode = original_prims[i].mode;
+         if (!converted_prim)
+            mode = original_prims[i].mode;
 
          for (unsigned j = 0; j < vertex_count; j++) {
-            indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j,
+            indices[idx++] = add_vertex(save, vertex_to_index,
+                                        converted_prim ? CAST_INDEX(tmp_indices, index_size, j) : original_prims[i].start + j,
                                         temp_vertices_buffer, &max_index);
          }
       }
 
+      /* Duplicate the last vertex for incomplete primitives */
+      if (vertex_count > 0) {
+         unsigned min_vert = u_prim_vertex_count(mode)->min;
+         for (unsigned j = vertex_count; j < min_vert; j++) {
+            indices[idx++] = add_vertex(save, vertex_to_index,
+                                       converted_prim ? CAST_INDEX(tmp_indices, index_size, vertex_count - 1) :
+                                                         original_prims[i].start + vertex_count - 1,
+                                       temp_vertices_buffer, &max_index);
+         }
+      }
+
+#undef CAST_INDEX
       if (merge_prims) {
          /* Update vertex count. */
          merged_prims[last_valid_prim].count += idx - start;
@@ -729,9 +751,10 @@ compile_vertex_list(struct gl_context *ctx)
          merged_prims[last_valid_prim].count = idx - start;
       }
       merged_prims[last_valid_prim].mode = mode;
-   }
 
-   assert(idx > 0 && idx <= max_index_count);
+      /* converted prims will filter incomplete primitives and may have no indices */
+      assert((idx > 0 || converted_prim) && idx <= max_index_count);
+   }
 
    unsigned merged_prim_count = last_valid_prim + 1;
    node->cold->ib.ptr = NULL;
@@ -761,17 +784,18 @@ compile_vertex_list(struct gl_context *ctx)
    if (total_bytes_needed > available_bytes) {
       if (save->current_bo)
          _mesa_reference_buffer_object(ctx, &save->current_bo, NULL);
-      save->current_bo = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
-      bool success = ctx->Driver.BufferData(ctx,
-                                            GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                            MAX2(total_bytes_needed, VBO_SAVE_BUFFER_SIZE),
-                                            NULL,
-                                            GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
-                                            save->current_bo);
+      save->current_bo = _mesa_bufferobj_alloc(ctx, VBO_BUF_ID + 1);
+      bool success = _mesa_bufferobj_data(ctx,
+                                          GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                          MAX2(total_bytes_needed, VBO_SAVE_BUFFER_SIZE),
+                                          NULL,
+                                          GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT |
+                                          MESA_GALLIUM_VERTEX_STATE_STORAGE,
+                                          save->current_bo);
       if (!success) {
          _mesa_reference_buffer_object(ctx, &save->current_bo, NULL);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
-         handle_out_of_memory(ctx);
+         save->out_of_memory = true;
       } else {
          save->current_bo_bytes_used = 0;
          available_bytes = save->current_bo->Size;
@@ -816,33 +840,36 @@ compile_vertex_list(struct gl_context *ctx)
    _mesa_reference_buffer_object(ctx, &node->cold->ib.obj, save->current_bo);
 
    /* Upload the vertices first (see buffer_offset) */
-   ctx->Driver.BufferSubData(ctx,
-                             save->current_bo_bytes_used,
-                             total_vert_count * save->vertex_size * sizeof(fi_type),
-                             vertex_to_index ? temp_vertices_buffer : save->vertex_store->buffer_in_ram,
-                             node->cold->ib.obj);
+   _mesa_bufferobj_subdata(ctx,
+                           save->current_bo_bytes_used,
+                           total_vert_count * save->vertex_size * sizeof(fi_type),
+                           vertex_to_index ? temp_vertices_buffer : save->vertex_store->buffer_in_ram,
+                           node->cold->ib.obj);
    save->current_bo_bytes_used += total_vert_count * save->vertex_size * sizeof(fi_type);
+   node->cold->bo_bytes_used = save->current_bo_bytes_used;
 
   if (vertex_to_index) {
       _mesa_hash_table_destroy(vertex_to_index, _free_entry);
       free(temp_vertices_buffer);
    }
 
-   /* Since we're append the indices to an existing buffer, we need to adjust the start value of each
+   /* Since we append the indices to an existing buffer, we need to adjust the start value of each
     * primitive (not the indices themselves). */
-   save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used;
-   int indices_offset = save->current_bo_bytes_used / 4;
-   for (int i = 0; i < merged_prim_count; i++) {
-      merged_prims[i].start += indices_offset;
+   if (!ctx->ListState.Current.UseLoopback) {
+      save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used;
+      int indices_offset = save->current_bo_bytes_used / 4;
+      for (int i = 0; i < merged_prim_count; i++) {
+         merged_prims[i].start += indices_offset;
+      }
    }
 
    /* Then upload the indices. */
    if (node->cold->ib.obj) {
-      ctx->Driver.BufferSubData(ctx,
-                                save->current_bo_bytes_used,
-                                idx * sizeof(uint32_t),
-                                indices,
-                                node->cold->ib.obj);
+      _mesa_bufferobj_subdata(ctx,
+                              save->current_bo_bytes_used,
+                              idx * sizeof(uint32_t),
+                              indices,
+                              node->cold->ib.obj);
       save->current_bo_bytes_used += idx * sizeof(uint32_t);
    } else {
       node->cold->vertex_count = 0;
@@ -850,56 +877,59 @@ compile_vertex_list(struct gl_context *ctx)
    }
 
    /* Prepare for DrawGallium */
-   memset(&node->merged.info, 0, sizeof(struct pipe_draw_info));
+   memset(&node->cold->info, 0, sizeof(struct pipe_draw_info));
    /* The other info fields will be updated in vbo_save_playback_vertex_list */
-   node->merged.info.index_size = 4;
-   node->merged.info.instance_count = 1;
-   node->merged.info.index.gl_bo = node->cold->ib.obj;
+   node->cold->info.index_size = 4;
+   node->cold->info.instance_count = 1;
+   node->cold->info.index.gl_bo = node->cold->ib.obj;
    if (merged_prim_count == 1) {
-      node->merged.info.mode = merged_prims[0].mode;
-      node->merged.start_count.start = merged_prims[0].start;
-      node->merged.start_count.count = merged_prims[0].count;
-      node->merged.start_count.index_bias = 0;
-      node->merged.mode = NULL;
+      node->cold->info.mode = merged_prims[0].mode;
+      node->start_count.start = merged_prims[0].start;
+      node->start_count.count = merged_prims[0].count;
+      node->start_count.index_bias = 0;
+      node->modes = NULL;
    } else {
-      node->merged.mode = malloc(merged_prim_count * sizeof(unsigned char));
-      node->merged.start_counts = malloc(merged_prim_count * sizeof(struct pipe_draw_start_count_bias));
+      node->modes = malloc(merged_prim_count * sizeof(unsigned char));
+      node->start_counts = malloc(merged_prim_count * sizeof(struct pipe_draw_start_count_bias));
       for (unsigned i = 0; i < merged_prim_count; i++) {
-         node->merged.start_counts[i].start = merged_prims[i].start;
-         node->merged.start_counts[i].count = merged_prims[i].count;
-         node->merged.start_counts[i].index_bias = 0;
-         node->merged.mode[i] = merged_prims[i].mode;
+         node->start_counts[i].start = merged_prims[i].start;
+         node->start_counts[i].count = merged_prims[i].count;
+         node->start_counts[i].index_bias = 0;
+         node->modes[i] = merged_prims[i].mode;
       }
    }
-   node->merged.num_draws = merged_prim_count;
-   if (node->merged.num_draws > 1) {
+   node->num_draws = merged_prim_count;
+   if (node->num_draws > 1) {
       bool same_mode = true;
-      for (unsigned i = 1; i < node->merged.num_draws && same_mode; i++) {
-         same_mode = node->merged.mode[i] == node->merged.mode[0];
+      for (unsigned i = 1; i < node->num_draws && same_mode; i++) {
+         same_mode = node->modes[i] == node->modes[0];
       }
       if (same_mode) {
          /* All primitives use the same mode, so we can simplify a bit */
-         node->merged.info.mode = node->merged.mode[0];
-         free(node->merged.mode);
-         node->merged.mode = NULL;
+         node->cold->info.mode = node->modes[0];
+         free(node->modes);
+         node->modes = NULL;
       }
    }
 
    free(indices);
+   free(tmp_indices);
    free(merged_prims);
 
 end:
+   node->draw_begins = node->cold->prims[0].begin;
 
    if (!save->current_bo) {
-      save->current_bo = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
-      bool success = ctx->Driver.BufferData(ctx,
-                                            GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                            VBO_SAVE_BUFFER_SIZE,
-                                            NULL,
-                                            GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
-                                            save->current_bo);
+      save->current_bo = _mesa_bufferobj_alloc(ctx, VBO_BUF_ID + 1);
+      bool success = _mesa_bufferobj_data(ctx,
+                                          GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                          VBO_SAVE_BUFFER_SIZE,
+                                          NULL,
+                                          GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT |
+                                          MESA_GALLIUM_VERTEX_STATE_STORAGE,
+                                          save->current_bo);
       if (!success)
-         handle_out_of_memory(ctx);
+         save->out_of_memory = true;
    }
 
    GLuint offsets[VBO_ATTRIB_MAX];
@@ -916,37 +946,45 @@ end:
                  save->current_bo, buffer_offset, stride,
                  save->enabled, save->attrsz, save->attrtype, offsets);
       /* Reference the vao in the dlist */
-      node->VAO[vpm] = NULL;
-      _mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
-   }
+      node->cold->VAO[vpm] = NULL;
+      _mesa_reference_vao(ctx, &node->cold->VAO[vpm], save->VAO[vpm]);
+   }
+
+   /* Prepare for DrawGalliumVertexState */
+   if (node->num_draws && ctx->Driver.DrawGalliumVertexState) {
+      for (unsigned i = 0; i < VP_MODE_MAX; i++) {
+         uint32_t enabled_attribs = _vbo_get_vao_filter(i) &
+                                    node->cold->VAO[i]->_EnabledWithMapMode;
+
+         node->state[i] =
+            ctx->Driver.CreateGalliumVertexState(ctx, node->cold->VAO[i],
+                                                 node->cold->ib.obj,
+                                                 enabled_attribs);
+         node->private_refcount[i] = 0;
+         node->enabled_attribs[i] = enabled_attribs;
+      }
 
+      node->ctx = ctx;
+      node->mode = node->cold->info.mode;
+      assert(node->cold->info.index_size == 4);
+   }
 
    /* Deal with GL_COMPILE_AND_EXECUTE:
     */
    if (ctx->ExecuteFlag) {
-      struct _glapi_table *dispatch = GET_DISPATCH();
-
-      _glapi_set_dispatch(ctx->Exec);
-
       /* _vbo_loopback_vertex_list doesn't use the index buffer, so we have to
-       * use buffer_in_ram instead of current_bo which contains all vertices instead
-       * of the deduplicated vertices only in the !UseLoopback case.
+       * use buffer_in_ram (which contains all vertices) instead of current_bo
+       * (which contains deduplicated vertices *when* UseLoopback is false).
        *
        * The problem is that the VAO offset is based on current_bo's layout,
        * so we have to use a temp value.
        */
-      struct gl_vertex_array_object *vao = node->VAO[VP_MODE_SHADER];
+      struct gl_vertex_array_object *vao = node->cold->VAO[VP_MODE_SHADER];
       GLintptr original = vao->BufferBinding[0].Offset;
-      if (!ctx->ListState.Current.UseLoopback) {
-         GLintptr new_offset = 0;
-         /* 'start_offset' has been added to all primitives 'start', so undo it here. */
-         new_offset -= start_offset * stride;
-         vao->BufferBinding[0].Offset = new_offset;
-      }
+      /* 'start_offset' has been added to all primitives 'start', so undo it here. */
+      vao->BufferBinding[0].Offset = -(GLintptr)(start_offset * stride);
       _vbo_loopback_vertex_list(ctx, node, save->vertex_store->buffer_in_ram);
       vao->BufferBinding[0].Offset = original;
-
-      _glapi_set_dispatch(dispatch);
    }
 
    /* Reset our structures for the next run of vertices:
@@ -1134,6 +1172,7 @@ upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz)
    if (save->copied.nr) {
       assert(save->copied.buffer);
       const fi_type *data = save->copied.buffer;
+      grow_vertex_storage(ctx, save->copied.nr);
       fi_type *dest = save->vertex_store->buffer_in_ram;
 
       /* Need to note this and fix up at runtime (or loopback):
@@ -1149,20 +1188,34 @@ upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz)
             const int j = u_bit_scan64(&enabled);
             assert(save->attrsz[j]);
             if (j == attr) {
-               if (oldsz) {
-                  COPY_CLEAN_4V_TYPE_AS_UNION(dest, oldsz, data,
-                                              save->attrtype[j]);
-                  data += oldsz;
-                  dest += newsz;
+               int k;
+               const fi_type *src = oldsz ? data : save->current[attr];
+               int copy = oldsz ? oldsz : newsz;
+               for (k = 0; k < copy; k++)
+                  dest[k] = src[k];
+               for (; k < newsz; k++) {
+                  switch (save->attrtype[j]) {
+                     case GL_FLOAT:
+                        dest[k] = FLOAT_AS_UNION(k == 3);
+                        break;
+                     case GL_INT:
+                        dest[k] = INT_AS_UNION(k == 3);
+                        break;
+                     case GL_UNSIGNED_INT:
+                        dest[k] = UINT_AS_UNION(k == 3);
+                        break;
+                     default:
+                        dest[k] = FLOAT_AS_UNION(k == 3);
+                        assert(!"Unexpected type in upgrade_vertex");
+                        break;
+                  }
                }
-               else {
-                  COPY_SZ_4V(dest, newsz, save->current[attr]);
-                  dest += newsz;
-               }
-            }
-            else {
+               dest += newsz;
+               data += oldsz;
+            } else {
                GLint sz = save->attrsz[j];
-               COPY_SZ_4V(dest, sz, data);
+               for (int k = 0; k < sz; k++)
+                  dest[k] = data[k];
                data += sz;
                dest += sz;
             }
@@ -1206,6 +1259,8 @@ fixup_vertex(struct gl_context *ctx, GLuint attr,
    }
 
    save->active_sz[attr] = sz;
+
+   grow_vertex_storage(ctx, 1);
 }
 
 
@@ -1254,36 +1309,39 @@ is_vertex_position(const struct gl_context *ctx, GLuint index)
  * 3f version won't otherwise set color[3] to 1.0 -- this is the job
  * of the chooser function when switching between Color4f and Color3f.
  */
-#define ATTR_UNION(A, N, T, C, V0, V1, V2, V3)                 \
-do {                                                           \
-   struct vbo_save_context *save = &vbo_context(ctx)->save;    \
-   int sz = (sizeof(C) / sizeof(GLfloat));                     \
-                                                               \
-   if (save->active_sz[A] != N)                                        \
-      fixup_vertex(ctx, A, N * sz, T);                         \
-                                                               \
-   {                                                           \
+#define ATTR_UNION(A, N, T, C, V0, V1, V2, V3)                  \
+do {                                                            \
+   struct vbo_save_context *save = &vbo_context(ctx)->save;     \
+   int sz = (sizeof(C) / sizeof(GLfloat));                      \
+                                                                \
+   if (save->active_sz[A] != N)                                 \
+      fixup_vertex(ctx, A, N * sz, T);                          \
+                                                                \
+   {                                                            \
       C *dest = (C *)save->attrptr[A];                          \
-      if (N>0) dest[0] = V0;                                   \
-      if (N>1) dest[1] = V1;                                   \
-      if (N>2) dest[2] = V2;                                   \
-      if (N>3) dest[3] = V3;                                   \
-      save->attrtype[A] = T;                                   \
-   }                                                           \
-                                                               \
-   if ((A) == 0) {                                             \
-      GLuint i;                                                        \
-      fi_type *buffer_ptr = save->vertex_store->buffer_in_ram + save->vertex_store->used; \
-                                                               \
-      for (i = 0; i < save->vertex_size; i++)                  \
-            buffer_ptr[i] = save->vertex[i];                   \
-                                                               \
-      save->vertex_store->used += save->vertex_size; \
-      if ((save->vertex_store->used + save->vertex_size) * sizeof(float) >= save->vertex_store->buffer_in_ram_size) { \
-             realloc_storage(ctx, -1, get_vertex_count(save) * 2); \
-         assert((save->vertex_store->used + save->vertex_size) * sizeof(float) < save->vertex_store->buffer_in_ram_size); \
-      } \
-   }                                                           \
+      if (N>0) dest[0] = V0;                                    \
+      if (N>1) dest[1] = V1;                                    \
+      if (N>2) dest[2] = V2;                                    \
+      if (N>3) dest[3] = V3;                                    \
+      save->attrtype[A] = T;                                    \
+   }                                                            \
+                                                                \
+   if ((A) == VBO_ATTRIB_POS) {                                 \
+      fi_type *buffer_ptr = save->vertex_store->buffer_in_ram + \
+                            save->vertex_store->used;           \
+                                                                \
+      for (int i = 0; i < save->vertex_size; i++)               \
+        buffer_ptr[i] = save->vertex[i];                        \
+                                                                \
+      save->vertex_store->used += save->vertex_size;            \
+      unsigned used_next = (save->vertex_store->used +          \
+                            save->vertex_size) * sizeof(float); \
+      if (used_next > save->vertex_store->buffer_in_ram_size) { \
+         grow_vertex_storage(ctx, get_vertex_count(save));      \
+         assert(used_next <=                                    \
+                save->vertex_store->buffer_in_ram_size);        \
+      }                                                         \
+   }                                                            \
 } while (0)
 
 #define TAG(x) _save_##x
@@ -1291,13 +1349,12 @@ do {                                                            \
 #include "vbo_attrib_tmp.h"
 
 
-
-#define MAT( ATTR, N, face, params )                   \
-do {                                                   \
-   if (face != GL_BACK)                                        \
-      MAT_ATTR( ATTR, N, params ); /* front */         \
-   if (face != GL_FRONT)                               \
-      MAT_ATTR( ATTR + 1, N, params ); /* back */      \
+#define MAT( ATTR, N, face, params )                            \
+do {                                                            \
+   if (face != GL_BACK)                                         \
+      MAT_ATTR( ATTR, N, params ); /* front */                  \
+   if (face != GL_FRONT)                                        \
+      MAT_ATTR( ATTR + 1, N, params ); /* back */               \
 } while (0)
 
 
@@ -1350,6 +1407,10 @@ _save_Materialfv(GLenum face, GLenum pname, const GLfloat *params)
 }
 
 
+static void
+vbo_init_dispatch_save_begin_end(struct gl_context *ctx);
+
+
 /* Cope with EvalCoord/CallList called within a begin/end object:
  *     -- Flush current buffer
  *     -- Fallback to opcodes for the rest of the begin/end object.
@@ -1381,10 +1442,10 @@ dlist_fallback(struct gl_context *ctx)
    copy_to_current(ctx);
    reset_vertex(ctx);
    if (save->out_of_memory) {
-      _mesa_install_save_vtxfmt(ctx, &save->vtxfmt);
+      vbo_install_save_vtxfmt_noop(ctx);
    }
    else {
-      _mesa_install_save_vtxfmt(ctx, &ctx->ListState.ListVtxfmt);
+      _mesa_init_dispatch_save_begin_end(ctx);
    }
    ctx->Driver.SaveNeedFlush = GL_FALSE;
 }
@@ -1469,7 +1530,9 @@ vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode,
 
    ctx->Driver.CurrentSavePrimitive = mode;
 
-   assert(i < save->prim_store->size);
+   if (!save->prim_store || i >= save->prim_store->size) {
+      save->prim_store = realloc_prim_store(save->prim_store, i * 2);
+   }
    save->prim_store->prims[i].mode = mode & VBO_SAVE_PRIM_MODE_MASK;
    save->prim_store->prims[i].begin = 1;
    save->prim_store->prims[i].end = 0;
@@ -1478,7 +1541,7 @@ vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode,
 
    save->no_current_update = no_current_update;
 
-   _mesa_install_save_vtxfmt(ctx, &save->vtxfmt);
+   vbo_init_dispatch_save_begin_end(ctx);
 
    /* We need to call vbo_save_SaveFlushVertices() if there's state change */
    ctx->Driver.SaveNeedFlush = GL_TRUE;
@@ -1496,20 +1559,15 @@ _save_End(void)
    save->prim_store->prims[i].end = 1;
    save->prim_store->prims[i].count = (get_vertex_count(save) - save->prim_store->prims[i].start);
 
-   if (i == (GLint) save->prim_store->size - 1) {
-      compile_vertex_list(ctx);
-      assert(save->copied.nr == 0);
-   }
-
    /* Swap out this vertex format while outside begin/end.  Any color,
     * etc. received between here and the next begin will be compiled
     * as opcodes.
     */
    if (save->out_of_memory) {
-      _mesa_install_save_vtxfmt(ctx, &save->vtxfmt);
+      vbo_install_save_vtxfmt_noop(ctx);
    }
    else {
-      _mesa_install_save_vtxfmt(ctx, &ctx->ListState.ListVtxfmt);
+      _mesa_init_dispatch_save_begin_end(ctx);
    }
 }
 
@@ -1547,13 +1605,8 @@ _save_PrimitiveRestartNV(void)
 }
 
 
-/* Unlike the functions above, these are to be hooked into the vtxfmt
- * maintained in ctx->ListState, active when the list is known or
- * suspected to be outside any begin/end primitive.
- * Note: OBE = Outside Begin/End
- */
-static void GLAPIENTRY
-_save_OBE_Rectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2)
+void GLAPIENTRY
+save_Rectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct _glapi_table *dispatch = ctx->CurrentServerDispatch;
@@ -1567,65 +1620,44 @@ _save_OBE_Rectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2)
 }
 
 
-static void GLAPIENTRY
-_save_OBE_Rectd(GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2)
+void GLAPIENTRY
+save_Rectdv(const GLdouble *v1, const GLdouble *v2)
 {
-   _save_OBE_Rectf((GLfloat) x1, (GLfloat) y1, (GLfloat) x2, (GLfloat) y2);
+   save_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
 }
 
-static void GLAPIENTRY
-_save_OBE_Rectdv(const GLdouble *v1, const GLdouble *v2)
+void GLAPIENTRY
+save_Rectfv(const GLfloat *v1, const GLfloat *v2)
 {
-   _save_OBE_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
+   save_Rectf(v1[0], v1[1], v2[0], v2[1]);
 }
 
-static void GLAPIENTRY
-_save_OBE_Rectfv(const GLfloat *v1, const GLfloat *v2)
+void GLAPIENTRY
+save_Recti(GLint x1, GLint y1, GLint x2, GLint y2)
 {
-   _save_OBE_Rectf(v1[0], v1[1], v2[0], v2[1]);
+   save_Rectf((GLfloat) x1, (GLfloat) y1, (GLfloat) x2, (GLfloat) y2);
 }
 
-static void GLAPIENTRY
-_save_OBE_Recti(GLint x1, GLint y1, GLint x2, GLint y2)
+void GLAPIENTRY
+save_Rectiv(const GLint *v1, const GLint *v2)
 {
-   _save_OBE_Rectf((GLfloat) x1, (GLfloat) y1, (GLfloat) x2, (GLfloat) y2);
+   save_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
 }
 
-static void GLAPIENTRY
-_save_OBE_Rectiv(const GLint *v1, const GLint *v2)
+void GLAPIENTRY
+save_Rects(GLshort x1, GLshort y1, GLshort x2, GLshort y2)
 {
-   _save_OBE_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
+   save_Rectf((GLfloat) x1, (GLfloat) y1, (GLfloat) x2, (GLfloat) y2);
 }
 
-static void GLAPIENTRY
-_save_OBE_Rects(GLshort x1, GLshort y1, GLshort x2, GLshort y2)
+void GLAPIENTRY
+save_Rectsv(const GLshort *v1, const GLshort *v2)
 {
-   _save_OBE_Rectf((GLfloat) x1, (GLfloat) y1, (GLfloat) x2, (GLfloat) y2);
+   save_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
 }
 
-static void GLAPIENTRY
-_save_OBE_Rectsv(const GLshort *v1, const GLshort *v2)
-{
-   _save_OBE_Rectf((GLfloat) v1[0], (GLfloat) v1[1], (GLfloat) v2[0], (GLfloat) v2[1]);
-}
-
-static void
-_ensure_draws_fits_in_storage(struct gl_context *ctx, int primcount, int vertcount)
-{
-   struct vbo_save_context *save = &vbo_context(ctx)->save;
-
-   bool realloc_prim = save->prim_store->used + primcount > save->prim_store->size;
-   bool realloc_vert = save->vertex_size &&
-      (save->vertex_store->used + vertcount * save->vertex_size) >=
-         save->vertex_store->buffer_in_ram_size;
-
-   if (realloc_prim || realloc_vert)
-      realloc_storage(ctx, realloc_prim ? primcount : -1, realloc_vert ? vertcount : -1);
-}
-
-
-static void GLAPIENTRY
-_save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
+void GLAPIENTRY
+save_DrawArrays(GLenum mode, GLint start, GLsizei count)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct gl_vertex_array_object *vao = ctx->Array.VAO;
@@ -1644,7 +1676,7 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
    if (save->out_of_memory)
       return;
 
-   _ensure_draws_fits_in_storage(ctx, 1, count);
+   grow_vertex_storage(ctx, count);
 
    /* Make sure to process any VBO binding changes */
    _mesa_update_state(ctx);
@@ -1661,9 +1693,9 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
 }
 
 
-static void GLAPIENTRY
-_save_OBE_MultiDrawArrays(GLenum mode, const GLint *first,
-                          const GLsizei *count, GLsizei primcount)
+void GLAPIENTRY
+save_MultiDrawArrays(GLenum mode, const GLint *first,
+                      const GLsizei *count, GLsizei primcount)
 {
    GET_CURRENT_CONTEXT(ctx);
    GLint i;
@@ -1689,11 +1721,11 @@ _save_OBE_MultiDrawArrays(GLenum mode, const GLint *first,
       vertcount += count[i];
    }
 
-   _ensure_draws_fits_in_storage(ctx, primcount, vertcount);
+   grow_vertex_storage(ctx, vertcount);
 
    for (i = 0; i < primcount; i++) {
       if (count[i] > 0) {
-         _save_OBE_DrawArrays(mode, first[i], count[i]);
+         save_DrawArrays(mode, first[i], count[i]);
       }
    }
 }
@@ -1725,9 +1757,9 @@ array_element(struct gl_context *ctx,
 /* Could do better by copying the arrays and element list intact and
  * then emitting an indexed prim at runtime.
  */
-static void GLAPIENTRY
-_save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
-                                 const GLvoid * indices, GLint basevertex)
+void GLAPIENTRY
+save_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+                             const GLvoid * indices, GLint basevertex)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct vbo_save_context *save = &vbo_context(ctx)->save;
@@ -1753,7 +1785,7 @@ _save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
    if (save->out_of_memory)
       return;
 
-   _ensure_draws_fits_in_storage(ctx, 1, count);
+   grow_vertex_storage(ctx, count);
 
    /* Make sure to process any VBO binding changes */
    _mesa_update_state(ctx);
@@ -1789,16 +1821,16 @@ _save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
    _mesa_vao_unmap(ctx, vao);
 }
 
-static void GLAPIENTRY
-_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
-                       const GLvoid * indices)
+void GLAPIENTRY
+save_DrawElements(GLenum mode, GLsizei count, GLenum type,
+                   const GLvoid * indices)
 {
-   _save_OBE_DrawElementsBaseVertex(mode, count, type, indices, 0);
+   save_DrawElementsBaseVertex(mode, count, type, indices, 0);
 }
 
 
-static void GLAPIENTRY
-_save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
+void GLAPIENTRY
+save_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
                             GLsizei count, GLenum type,
                             const GLvoid * indices)
 {
@@ -1829,13 +1861,28 @@ _save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
    if (save->out_of_memory)
       return;
 
-   _save_OBE_DrawElements(mode, count, type, indices);
+   save_DrawElements(mode, count, type, indices);
 }
 
+void GLAPIENTRY
+save_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
+                                 GLsizei count, GLenum type,
+                                 const GLvoid *indices, GLint basevertex)
+{
+   GET_CURRENT_CONTEXT(ctx);
 
-static void GLAPIENTRY
-_save_OBE_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type,
-                            const GLvoid * const *indices, GLsizei primcount)
+   if (end < start) {
+      _mesa_compile_error(ctx, GL_INVALID_VALUE,
+                          "glDrawRangeElementsBaseVertex(end < start)");
+      return;
+   }
+
+   save_DrawElementsBaseVertex(mode, count, type, indices, basevertex);
+}
+
+void GLAPIENTRY
+save_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type,
+                       const GLvoid * const *indices, GLsizei primcount)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct _glapi_table *dispatch = ctx->CurrentServerDispatch;
@@ -1845,22 +1892,22 @@ _save_OBE_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type,
    for (i = 0; i < primcount; i++) {
       vertcount += count[i];
    }
-   _ensure_draws_fits_in_storage(ctx, primcount, vertcount);
+   grow_vertex_storage(ctx, vertcount);
 
    for (i = 0; i < primcount; i++) {
       if (count[i] > 0) {
-        CALL_DrawElements(dispatch, (mode, count[i], type, indices[i]));
+         CALL_DrawElements(dispatch, (mode, count[i], type, indices[i]));
       }
    }
 }
 
 
-static void GLAPIENTRY
-_save_OBE_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count,
-                                      GLenum type,
-                                      const GLvoid * const *indices,
-                                      GLsizei primcount,
-                                      const GLint *basevertex)
+void GLAPIENTRY
+save_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count,
+                                  GLenum type,
+                                  const GLvoid * const *indices,
+                                  GLsizei primcount,
+                                  const GLint *basevertex)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct _glapi_table *dispatch = ctx->CurrentServerDispatch;
@@ -1870,62 +1917,31 @@ _save_OBE_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count,
    for (i = 0; i < primcount; i++) {
       vertcount += count[i];
    }
-   _ensure_draws_fits_in_storage(ctx, primcount, vertcount);
+   grow_vertex_storage(ctx, vertcount);
 
    for (i = 0; i < primcount; i++) {
       if (count[i] > 0) {
-        CALL_DrawElementsBaseVertex(dispatch, (mode, count[i], type,
-                                                     indices[i],
-                                                     basevertex[i]));
+         CALL_DrawElementsBaseVertex(dispatch, (mode, count[i], type,
+                                     indices[i],
+                                     basevertex[i]));
       }
    }
 }
 
 
 static void
-vtxfmt_init(struct gl_context *ctx)
+vbo_init_dispatch_save_begin_end(struct gl_context *ctx)
 {
-   struct vbo_save_context *save = &vbo_context(ctx)->save;
-   GLvertexformat *vfmt = &save->vtxfmt;
-
-#define NAME_AE(x) _ae_##x
+#define NAME_AE(x) _mesa_##x
 #define NAME_CALLLIST(x) _save_##x
 #define NAME(x) _save_##x
-#define NAME_ES(x) _save_##x##ARB
-
-#include "vbo_init_tmp.h"
-}
+#define NAME_ES(x) _save_##x
 
-
-/**
- * Initialize the dispatch table with the VBO functions for display
- * list compilation.
- */
-void
-vbo_initialize_save_dispatch(const struct gl_context *ctx,
-                             struct _glapi_table *exec)
-{
-   SET_DrawArrays(exec, _save_OBE_DrawArrays);
-   SET_MultiDrawArrays(exec, _save_OBE_MultiDrawArrays);
-   SET_DrawElements(exec, _save_OBE_DrawElements);
-   SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex);
-   SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements);
-   SET_MultiDrawElementsEXT(exec, _save_OBE_MultiDrawElements);
-   SET_MultiDrawElementsBaseVertex(exec, _save_OBE_MultiDrawElementsBaseVertex);
-   SET_Rectf(exec, _save_OBE_Rectf);
-   SET_Rectd(exec, _save_OBE_Rectd);
-   SET_Rectdv(exec, _save_OBE_Rectdv);
-   SET_Rectfv(exec, _save_OBE_Rectfv);
-   SET_Recti(exec, _save_OBE_Recti);
-   SET_Rectiv(exec, _save_OBE_Rectiv);
-   SET_Rects(exec, _save_OBE_Rects);
-   SET_Rectsv(exec, _save_OBE_Rectsv);
-
-   /* Note: other glDraw functins aren't compiled into display lists */
+   struct _glapi_table *tab = ctx->Save;
+   #include "api_beginend_init.h"
 }
 
 
-
 void
 vbo_save_SaveFlushVertices(struct gl_context *ctx)
 {
@@ -1960,7 +1976,7 @@ vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode)
       save->prim_store = realloc_prim_store(NULL, 8);
 
    if (!save->vertex_store)
-      save->vertex_store = realloc_vertex_store(NULL, save->vertex_size, 8);
+      save->vertex_store = CALLOC_STRUCT(vbo_save_vertex_store);
 
    reset_vertex(ctx);
    ctx->Driver.SaveNeedFlush = GL_FALSE;
@@ -1995,7 +2011,7 @@ vbo_save_EndList(struct gl_context *ctx)
        * etc. received between here and the next begin will be compiled
        * as opcodes.
        */
-      _mesa_install_save_vtxfmt(ctx, &ctx->ListState.ListVtxfmt);
+      _mesa_init_dispatch_save_begin_end(ctx);
    }
 
    assert(save->vertex_size == 0);
@@ -2034,6 +2050,5 @@ vbo_save_api_init(struct vbo_save_context *save)
 {
    struct gl_context *ctx = gl_context_from_vbo_save(save);
 
-   vtxfmt_init(ctx);
    current_init(ctx);
 }