glthread: use signed vertex buffer offsets when available, don't require them

author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>

Tue, 27 Sep 2022 22:26:49 +0000 (18:26 -0400)

committer Marge Bot <emma+marge@anholt.net>

Thu, 29 Sep 2022 22:05:06 +0000 (22:05 +0000)
author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Tue, 27 Sep 2022 22:26:49 +0000 (18:26 -0400)
committer Marge Bot <emma+marge@anholt.net>
Thu, 29 Sep 2022 22:05:06 +0000 (22:05 +0000)
diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c

index b44ff9b..b66e374 100644 (file)
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -132,13 +132,6 @@ _mesa_glthread_init(struct gl_context *ctx)
        ctx->Const.BufferCreateMapUnsynchronizedThreadSafe &&
        ctx->Const.AllowMappedBuffersDuringExecution;
  
-   /* If the draw start index is non-zero, glthread can upload to offset 0,
-    * which means the attrib offset has to be -(first * stride).
-    * So require signed vertex buffer offsets.
-    */
-   glthread->SupportsNonVBOUploads = glthread->SupportsBufferUploads &&
-                                     ctx->Const.VertexBufferOffsetIsInt32;
-
     ctx->CurrentClientDispatch = ctx->MarshalExec;
  
     glthread->LastDListChangeBatchIndex = -1;
diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h

index efc579a..fbce31f 100644 (file)
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -188,7 +188,6 @@ struct glthread_state
  
     /** Caps. */
     GLboolean SupportsBufferUploads;
-   GLboolean SupportsNonVBOUploads;
  
     /** Primitive restart state. */
     bool PrimitiveRestart;
@@ -251,7 +250,8 @@ void _mesa_glthread_finish_before(struct gl_context *ctx, const char *func);
  void _mesa_glthread_upload(struct gl_context *ctx, const void *data,
                             GLsizeiptr size, unsigned *out_offset,
                             struct gl_buffer_object **out_buffer,
-                           uint8_t **out_ptr);
+                           uint8_t **out_ptr,
+                           unsigned start_offset);
  void _mesa_glthread_reset_vao(struct glthread_vao *vao);
  void _mesa_error_glthread_safe(struct gl_context *ctx, GLenum error,
                                 bool glthread, const char *format, ...);
diff --git a/src/mesa/main/glthread_bufferobj.c b/src/mesa/main/glthread_bufferobj.c

index 5745df4..615d3b3 100644 (file)
--- a/src/mesa/main/glthread_bufferobj.c
+++ b/src/mesa/main/glthread_bufferobj.c
@@ -66,7 +66,8 @@ void
  _mesa_glthread_upload(struct gl_context *ctx, const void *data,
                        GLsizeiptr size, unsigned *out_offset,
                        struct gl_buffer_object **out_buffer,
-                      uint8_t **out_ptr)
+                      uint8_t **out_ptr,
+                      unsigned start_offset)
  {
     struct glthread_state *glthread = &ctx->GLThread;
     const unsigned default_size = 1024 * 1024;
@@ -75,14 +76,14 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
        return;
  
     /* The alignment was chosen arbitrarily. */
-   unsigned offset = align(glthread->upload_offset, 8);
+   unsigned offset = align(glthread->upload_offset, 8) + start_offset;
  
     /* Allocate a new buffer if needed. */
     if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
        /* If the size is greater than the buffer size, allocate a separate buffer
         * just for this upload.
         */
-      if (unlikely(size > default_size)) {
+      if (unlikely(start_offset + size > default_size)) {
           uint8_t *ptr;
  
           assert(*out_buffer == NULL);
@@ -90,7 +91,8 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
           if (!*out_buffer)
              return;
  
-         *out_offset = 0;
+         ptr += start_offset;
+         *out_offset = start_offset;
           if (data)
              memcpy(ptr, data, size);
           else
@@ -107,7 +109,7 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
        glthread->upload_buffer =
           new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
        glthread->upload_offset = 0;
-      offset = 0;
+      offset = start_offset;
  
        /* Since atomic operations are very very slow when 2 threads are not
         * sharing one L3 cache (which can happen on AMD Zen), prevent using
@@ -430,7 +432,7 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
        unsigned upload_offset = 0;
  
        _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
-                            NULL);
+                            NULL, 0);
  
        if (upload_buffer) {
           _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
diff --git a/src/mesa/main/glthread_draw.c b/src/mesa/main/glthread_draw.c

index 030b7be..16eab75 100644 (file)
--- a/src/mesa/main/glthread_draw.c
+++ b/src/mesa/main/glthread_draw.c
@@ -69,7 +69,7 @@ upload_indices(struct gl_context *ctx, unsigned count, unsigned index_size,
     assert(count);
  
     _mesa_glthread_upload(ctx, *indices, index_size * count,
-                         &upload_offset, &upload_buffer, NULL);
+                         &upload_offset, &upload_buffer, NULL, 0);
     assert(upload_buffer);
     *indices = (const GLvoid*)(intptr_t)upload_offset;
  
@@ -89,7 +89,7 @@ upload_multi_indices(struct gl_context *ctx, unsigned total_count,
     assert(total_count);
  
     _mesa_glthread_upload(ctx, NULL, index_size * total_count,
-                         &upload_offset, &upload_buffer, &upload_ptr);
+                         &upload_offset, &upload_buffer, &upload_ptr, 0);
     assert(upload_buffer);
  
     for (unsigned i = 0, offset = 0; i < draw_count; i++) {
@@ -188,10 +188,14 @@ upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask,
           end = end_offset[binding_index];
           assert(start < end);
  
+         /* If the draw start index is non-zero, glthread can upload to offset 0,
+         * which means the attrib offset has to be -(first * stride).
+         * So use signed vertex buffer offsets when possible to save memory.
+         */
           const void *ptr = vao->Attrib[binding_index].Pointer;
           _mesa_glthread_upload(ctx, (uint8_t*)ptr + start,
                                 end - start, &upload_offset,
-                               &upload_buffer, NULL);
+                               &upload_buffer, NULL, ctx->Const.VertexBufferOffsetIsInt32 ? 0 : start);
           assert(upload_buffer);
  
           buffers[num_buffers].buffer = upload_buffer;
@@ -239,9 +243,14 @@ upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask,
           size = stride * (num_vertices - 1) + element_size;
        }
  
+      /* If the draw start index is non-zero, glthread can upload to offset 0,
+       * which means the attrib offset has to be -(first * stride).
+       * So use signed vertex buffer offsets when possible to save memory.
+       */
        const void *ptr = vao->Attrib[binding_index].Pointer;
        _mesa_glthread_upload(ctx, (uint8_t*)ptr + offset,
-                            size, &upload_offset, &upload_buffer, NULL);
+                            size, &upload_offset, &upload_buffer, NULL,
+                            ctx->Const.VertexBufferOffsetIsInt32 ? 0 : offset);
        assert(upload_buffer);
  
        buffers[num_buffers].buffer = upload_buffer;
@@ -405,7 +414,7 @@ draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count,
  
     /* Upload and draw. */
     struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
-   if (!ctx->GLThread.SupportsNonVBOUploads ||
+   if (!ctx->GLThread.SupportsBufferUploads ||
         !upload_vertices(ctx, user_buffer_mask, first, count, baseinstance,
                          instance_count, buffers)) {
        _mesa_glthread_finish_before(ctx, "DrawArrays");
@@ -517,7 +526,7 @@ _mesa_marshal_MultiDrawArrays(GLenum mode, const GLint *first,
     }
  
     /* If the draw count is too high or negative, the queue can't be used. */
-   if (!ctx->GLThread.SupportsNonVBOUploads ||
+   if (!ctx->GLThread.SupportsBufferUploads ||
         draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 16)
        goto sync;
  
@@ -805,7 +814,7 @@ draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices,
        return;
     }
  
-   if (!ctx->GLThread.SupportsNonVBOUploads)
+   if (!ctx->GLThread.SupportsBufferUploads)
        goto sync;
  
     bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask;
@@ -1030,7 +1039,7 @@ _mesa_marshal_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count,
      * and index bounds are not valid. We would have to map the indices
      * to compute the index bounds, and for that we would have to sync anyway.
      */
-   if (!ctx->GLThread.SupportsNonVBOUploads ||
+   if (!ctx->GLThread.SupportsBufferUploads ||
         draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 32 ||
         (need_index_bounds && !has_user_indices))
        goto sync;
author	Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
	Tue, 27 Sep 2022 22:26:49 +0000 (18:26 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Thu, 29 Sep 2022 22:05:06 +0000 (22:05 +0000)
src/mesa/main/glthread.c		patch \| blob \| history
src/mesa/main/glthread.h		patch \| blob \| history
src/mesa/main/glthread_bufferobj.c		patch \| blob \| history
src/mesa/main/glthread_draw.c		patch \| blob \| history