/** Caps. */
GLboolean SupportsBufferUploads;
- GLboolean SupportsNonVBOUploads;
/** Primitive restart state. */
bool PrimitiveRestart;
void _mesa_glthread_upload(struct gl_context *ctx, const void *data,
GLsizeiptr size, unsigned *out_offset,
struct gl_buffer_object **out_buffer,
- uint8_t **out_ptr);
+ uint8_t **out_ptr,
+ unsigned start_offset);
void _mesa_glthread_reset_vao(struct glthread_vao *vao);
void _mesa_error_glthread_safe(struct gl_context *ctx, GLenum error,
bool glthread, const char *format, ...);
_mesa_glthread_upload(struct gl_context *ctx, const void *data,
GLsizeiptr size, unsigned *out_offset,
struct gl_buffer_object **out_buffer,
- uint8_t **out_ptr)
+ uint8_t **out_ptr,
+ unsigned start_offset)
{
struct glthread_state *glthread = &ctx->GLThread;
const unsigned default_size = 1024 * 1024;
return;
/* The alignment was chosen arbitrarily. */
- unsigned offset = align(glthread->upload_offset, 8);
+ unsigned offset = align(glthread->upload_offset, 8) + start_offset;
/* Allocate a new buffer if needed. */
if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
/* If the size is greater than the buffer size, allocate a separate buffer
* just for this upload.
*/
- if (unlikely(size > default_size)) {
+ if (unlikely(start_offset + size > default_size)) {
uint8_t *ptr;
assert(*out_buffer == NULL);
if (!*out_buffer)
return;
- *out_offset = 0;
+ ptr += start_offset;
+ *out_offset = start_offset;
if (data)
memcpy(ptr, data, size);
else
glthread->upload_buffer =
new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
glthread->upload_offset = 0;
- offset = 0;
+ offset = start_offset;
/* Since atomic operations are very very slow when 2 threads are not
* sharing one L3 cache (which can happen on AMD Zen), prevent using
unsigned upload_offset = 0;
_mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
- NULL);
+ NULL, 0);
if (upload_buffer) {
_mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
assert(count);
_mesa_glthread_upload(ctx, *indices, index_size * count,
- &upload_offset, &upload_buffer, NULL);
+ &upload_offset, &upload_buffer, NULL, 0);
assert(upload_buffer);
*indices = (const GLvoid*)(intptr_t)upload_offset;
assert(total_count);
_mesa_glthread_upload(ctx, NULL, index_size * total_count,
- &upload_offset, &upload_buffer, &upload_ptr);
+ &upload_offset, &upload_buffer, &upload_ptr, 0);
assert(upload_buffer);
for (unsigned i = 0, offset = 0; i < draw_count; i++) {
end = end_offset[binding_index];
assert(start < end);
+ /* If the draw start index is non-zero, glthread can upload to offset 0,
+ * which means the attrib offset has to be -(first * stride).
+ * So use signed vertex buffer offsets when possible to save memory.
+ */
const void *ptr = vao->Attrib[binding_index].Pointer;
_mesa_glthread_upload(ctx, (uint8_t*)ptr + start,
end - start, &upload_offset,
- &upload_buffer, NULL);
+ &upload_buffer, NULL, ctx->Const.VertexBufferOffsetIsInt32 ? 0 : start);
assert(upload_buffer);
buffers[num_buffers].buffer = upload_buffer;
size = stride * (num_vertices - 1) + element_size;
}
+ /* If the draw start index is non-zero, glthread can upload to offset 0,
+ * which means the attrib offset has to be -(first * stride).
+ * So use signed vertex buffer offsets when possible to save memory.
+ */
const void *ptr = vao->Attrib[binding_index].Pointer;
_mesa_glthread_upload(ctx, (uint8_t*)ptr + offset,
- size, &upload_offset, &upload_buffer, NULL);
+ size, &upload_offset, &upload_buffer, NULL,
+ ctx->Const.VertexBufferOffsetIsInt32 ? 0 : offset);
assert(upload_buffer);
buffers[num_buffers].buffer = upload_buffer;
/* Upload and draw. */
struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
- if (!ctx->GLThread.SupportsNonVBOUploads ||
+ if (!ctx->GLThread.SupportsBufferUploads ||
!upload_vertices(ctx, user_buffer_mask, first, count, baseinstance,
instance_count, buffers)) {
_mesa_glthread_finish_before(ctx, "DrawArrays");
}
/* If the draw count is too high or negative, the queue can't be used. */
- if (!ctx->GLThread.SupportsNonVBOUploads ||
+ if (!ctx->GLThread.SupportsBufferUploads ||
draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 16)
goto sync;
return;
}
- if (!ctx->GLThread.SupportsNonVBOUploads)
+ if (!ctx->GLThread.SupportsBufferUploads)
goto sync;
bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask;
* and index bounds are not valid. We would have to map the indices
* to compute the index bounds, and for that we would have to sync anyway.
*/
- if (!ctx->GLThread.SupportsNonVBOUploads ||
+ if (!ctx->GLThread.SupportsBufferUploads ||
draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 32 ||
(need_index_bounds && !has_user_indices))
goto sync;