From 04856a3828411fe23fc043787b7687c0416e51fc Mon Sep 17 00:00:00 2001 From: Matthew Waters Date: Fri, 4 Jun 2021 18:32:07 +1000 Subject: [PATCH] glbuffer: support persistent buffer mappings Requires OpenGL 4.4 or EXT_buffer_storage Current mesa exposes GL_ARB_buffer_storage when retrieving the relevant functions returns no-ops and causes failures. Improves throughput of uploads by roughly 30%-60% and download throughput by roughly 10-30% across depending on the exact scenario and hardware. Part-of: --- gst-libs/gst/gl/glprototypes/all_functions.h | 1 + gst-libs/gst/gl/glprototypes/buffer_storage.h | 42 +++++++++++ gst-libs/gst/gl/gstglbuffer.c | 100 ++++++++++++++++++++++++-- gst-libs/gst/gl/gstglfuncs.h | 2 +- gst-libs/gst/gl/gstglmemorypbo.c | 49 +++++++++---- gst-libs/gst/gl/meson.build | 1 + 6 files changed, 177 insertions(+), 18 deletions(-) create mode 100644 gst-libs/gst/gl/glprototypes/buffer_storage.h diff --git a/gst-libs/gst/gl/glprototypes/all_functions.h b/gst-libs/gst/gl/glprototypes/all_functions.h index 9f7e4f1..48007e9 100644 --- a/gst-libs/gst/gl/glprototypes/all_functions.h +++ b/gst-libs/gst/gl/glprototypes/all_functions.h @@ -31,3 +31,4 @@ #include "sync.h" #include "buffers.h" #include "query.h" +#include "buffer_storage.h" diff --git a/gst-libs/gst/gl/glprototypes/buffer_storage.h b/gst-libs/gst/gl/glprototypes/buffer_storage.h new file mode 100644 index 0000000..be31bee --- /dev/null +++ b/gst-libs/gst/gl/glprototypes/buffer_storage.h @@ -0,0 +1,42 @@ +/* + * GStreamer + * Copyright (C) 2021 Matthew Waters + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +GST_GL_EXT_BEGIN (buffer_storage, + GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | + GST_GL_API_GLES2, + 4, 4, + 255, 255, + "EXT\0", /* XXX: GL_ARB_buffer_storage advertisement seems to be broken in Mesa */ + "buffer_storage\0") +GST_GL_EXT_FUNCTION (void, BufferStorage, + (GLenum target, GLsizeiptr, const void * data, GLbitfield flags)) +GST_GL_EXT_END () + +GST_GL_EXT_BEGIN (flush_mapped, + GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | + GST_GL_API_GLES2, + 3, 0, + 3, 0, + "\0", + "\0") +GST_GL_EXT_FUNCTION (void, FlushMappedBufferRange, + (GLenum target, GLintptr offset, GLsizeiptr length)) +GST_GL_EXT_END () + diff --git a/gst-libs/gst/gl/gstglbuffer.c b/gst-libs/gst/gl/gstglbuffer.c index 5a5f912..f60c710 100644 --- a/gst-libs/gst/gl/gstglbuffer.c +++ b/gst-libs/gst/gl/gstglbuffer.c @@ -53,6 +53,8 @@ #define USING_GLES2(context) (gst_gl_context_check_gl_version (context, GST_GL_API_GLES2, 2, 0)) #define USING_GLES3(context) (gst_gl_context_check_gl_version (context, GST_GL_API_GLES2, 3, 0)) +#define HAVE_BUFFER_STORAGE(context) (context->gl_vtable->BufferStorage != NULL) + /* compatibility definitions... */ #ifndef GL_MAP_READ_BIT #define GL_MAP_READ_BIT 0x0001 @@ -60,6 +62,18 @@ #ifndef GL_MAP_WRITE_BIT #define GL_MAP_WRITE_BIT 0x0002 #endif +#ifndef GL_MAP_FLUSH_EXPLICIT_BIT +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#endif +#ifndef GL_MAP_PERSISTENT_BIT +#define GL_MAP_PERSISTENT_BIT 0x0040 +#endif +#ifndef GL_DYNAMIC_STORAGE_BIT +#define GL_DYNAMIC_STORAGE_BIT 0x0100 +#endif +#ifndef GL_CLIENT_STORAGE_BIT +#define GL_CLIENT_STORAGE_BIT 0x0200 +#endif #ifndef GL_COPY_READ_BUFFER #define GL_COPY_READ_BUFFER 0x8F36 #endif @@ -81,8 +95,31 @@ _gl_buffer_create (GstGLBuffer * gl_mem, GError ** error) gl->GenBuffers (1, &gl_mem->id); gl->BindBuffer (gl_mem->target, gl_mem->id); - gl->BufferData (gl_mem->target, gl_mem->mem.mem.maxsize, NULL, - gl_mem->usage_hints); + if (HAVE_BUFFER_STORAGE (gl_mem->mem.context)) { + GLenum flags = 0; + + flags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; + /* allow access to GPU-side data while there are outstanding mappings */ + flags |= GL_MAP_PERSISTENT_BIT; + /* match the glBufferData() below and make this buffer mutable */ + flags |= GL_DYNAMIC_STORAGE_BIT; + /* hint that the data should be kept CPU-side. Fixes atrocious + * performance when e.g. libav decoders are direct-rendering into our + * data pointer */ + /* XXX: make this an fine-grained option. The current assumption here is + * that users signal GL_STREAM_DRAW for regularly updating video frames as + * is done by gstglmemorypbo */ + if (gl_mem->usage_hints == GL_STREAM_DRAW) { + GST_CAT_DEBUG (GST_CAT_GL_BUFFER, "using client-side storage for " + "buffer %p %u", gl_mem, gl_mem->id); + flags |= GL_CLIENT_STORAGE_BIT; + } + + gl->BufferStorage (gl_mem->target, gl_mem->mem.mem.maxsize, NULL, flags); + } else { + gl->BufferData (gl_mem->target, gl_mem->mem.mem.maxsize, NULL, + gl_mem->usage_hints); + } gl->BindBuffer (gl_mem->target, 0); return TRUE; @@ -121,20 +158,50 @@ _gl_buffer_new (GstAllocator * allocator, GstMemory * parent, return ret; } +static GLenum +gst_map_flags_to_gl (GstMapFlags flags) +{ + GLenum ret = 0; + + if (flags & GST_MAP_READ) + ret |= GL_MAP_READ_BIT; + + if (flags & GST_MAP_WRITE) + ret |= GL_MAP_WRITE_BIT; + + return ret; +} + static gpointer gst_gl_buffer_cpu_access (GstGLBuffer * mem, GstMapInfo * info, gsize size) { const GstGLFuncs *gl = mem->mem.context->gl_vtable; gpointer data, ret; + GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping %p id %d size %" G_GSIZE_FORMAT, + mem, mem->id, size); + + if (HAVE_BUFFER_STORAGE (mem->mem.context)) { + GLenum gl_map_flags = gst_map_flags_to_gl (info->flags); + + gl_map_flags |= GL_MAP_PERSISTENT_BIT; + if (info->flags & GST_MAP_WRITE) + gl_map_flags |= GL_MAP_FLUSH_EXPLICIT_BIT; + + if (mem->mem.map_count == (mem->mem.gl_map_count + 1)) { + gl->BindBuffer (mem->target, mem->id); + mem->mem.data = gl->MapBufferRange (mem->target, 0, size, gl_map_flags); + gl->BindBuffer (mem->target, 0); + } + + return mem->mem.data; + } + if (!gst_gl_base_memory_alloc_data (GST_GL_BASE_MEMORY_CAST (mem))) return NULL; ret = mem->mem.data; - GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping id %d size %" G_GSIZE_FORMAT, - mem->id, size); - /* The extra data pointer indirection/memcpy is needed for coherent across * concurrent map()'s in both GL and CPU */ if (GST_MEMORY_FLAG_IS_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD) @@ -177,6 +244,14 @@ gst_gl_buffer_upload_cpu_write (GstGLBuffer * mem, GstMapInfo * info, /* no data pointer has been written */ return; + GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping %p id %d size %" G_GSIZE_FORMAT, + mem, mem->id, size); + + if (HAVE_BUFFER_STORAGE (mem->mem.context)) { + /* flushing is done on unmap already */ + return; + } + /* The extra data pointer indirection/memcpy is needed for coherent across * concurrent map()'s in both GL and CPU */ /* FIXME: uploading potentially half-written data for libav pushing READWRITE @@ -227,6 +302,21 @@ _gl_buffer_unmap (GstGLBuffer * mem, GstMapInfo * info) { const GstGLFuncs *gl = mem->mem.context->gl_vtable; + GST_CAT_LOG (GST_CAT_GL_BUFFER, "unmapping %p id %d size %" G_GSIZE_FORMAT, + mem, mem->id, info->size); + + if (HAVE_BUFFER_STORAGE (mem->mem.context) && (info->flags & GST_MAP_GL) == 0) { + gl->BindBuffer (mem->target, mem->id); + + if (info->flags & GST_MAP_WRITE) + gl->FlushMappedBufferRange (mem->target, 0, info->maxsize); + + if (mem->mem.map_count == (mem->mem.gl_map_count + 1)) + /* if this is our last cpu-mapping, unmap the GL buffer */ + gl->UnmapBuffer (mem->target); + + gl->BindBuffer (mem->target, 0); + } if ((info->flags & GST_MAP_GL) != 0) { gl->BindBuffer (mem->target, 0); } diff --git a/gst-libs/gst/gl/gstglfuncs.h b/gst-libs/gst/gl/gstglfuncs.h index 0ec5c9f..81ba4d5 100644 --- a/gst-libs/gst/gl/gstglfuncs.h +++ b/gst-libs/gst/gl/gstglfuncs.h @@ -99,7 +99,7 @@ G_BEGIN_DECLS struct _GstGLFuncs { #include - gpointer padding[GST_PADDING_LARGE*6]; + gpointer padding[GST_PADDING_LARGE*6-2]; }; #undef GST_GL_EXT_BEGIN diff --git a/gst-libs/gst/gl/gstglmemorypbo.c b/gst-libs/gst/gl/gstglmemorypbo.c index fc156ba..13fdba3 100644 --- a/gst-libs/gst/gl/gstglmemorypbo.c +++ b/gst-libs/gst/gl/gstglmemorypbo.c @@ -197,6 +197,7 @@ _gl_mem_create (GstGLMemoryPBO * gl_mem, GError ** error) { 0, GST_MEMORY_CAST (gl_mem)->align, 0, 0 }; GstGLBaseMemoryAllocator *buf_allocator; GstGLBufferAllocationParams *params; + GstMapInfo pbo_map; buf_allocator = GST_GL_BASE_MEMORY_ALLOCATOR (gst_allocator_find @@ -211,9 +212,34 @@ _gl_mem_create (GstGLMemoryPBO * gl_mem, GError ** error) gl_mem->pbo = (GstGLBuffer *) gst_gl_base_memory_alloc (buf_allocator, (GstGLAllocationParams *) params); - gst_gl_allocation_params_free ((GstGLAllocationParams *) params); gst_object_unref (buf_allocator); + /* if we are wrapping an existing data pointer, and glbuffer is using + * GL_EXT/ARB_buffer_storage or OpenGL 4.4 then we need to copy into the GL + * provided data pointer. + */ + if (GST_MEMORY_FLAG_IS_SET (gl_mem, + GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD)) { + GST_MINI_OBJECT_FLAG_SET (gl_mem->pbo, + GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD); + gl_mem->pbo->mem.data = params->parent.wrapped_data; + + if (!gst_memory_map ((GstMemory *) gl_mem->pbo, &pbo_map, + GST_MAP_READWRITE)) { + gst_gl_allocation_params_free ((GstGLAllocationParams *) params); + GST_CAT_WARNING (GST_CAT_GL_MEMORY, "Failed to map pbo memory"); + return FALSE; + } + + if (pbo_map.data != params->parent.wrapped_data) { + memcpy (pbo_map.data, gl_mem->mem.mem.data, + GST_MEMORY_CAST (gl_mem)->size); + } + + gst_memory_unmap ((GstMemory *) gl_mem->pbo, &pbo_map); + } + gst_gl_allocation_params_free ((GstGLAllocationParams *) params); + GST_CAT_LOG (GST_CAT_GL_MEMORY, "generated pbo %u", gl_mem->pbo->id); } @@ -626,9 +652,12 @@ _gl_mem_pbo_alloc (GstGLBaseMemoryAllocator * allocator, GstGLVideoAllocationParams * params) { GstGLMemoryPBO *mem; + GstAllocationParams alloc_params = { 0, }; guint alloc_flags; alloc_flags = params->parent.alloc_flags; + if (params->parent.alloc_params) + alloc_params = *params->parent.alloc_params; g_return_val_if_fail (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_VIDEO, NULL); @@ -638,23 +667,19 @@ _gl_mem_pbo_alloc (GstGLBaseMemoryAllocator * allocator, if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_GPU_HANDLE) { mem->mem.tex_id = GPOINTER_TO_UINT (params->parent.gl_handle); mem->mem.texture_wrapped = TRUE; + alloc_params.flags |= GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD; + } + if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_SYSMEM) { + alloc_params.flags |= GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD; + mem->mem.mem.data = params->parent.wrapped_data; } gst_gl_memory_init (GST_GL_MEMORY_CAST (mem), GST_ALLOCATOR_CAST (allocator), NULL, params->parent.context, params->target, params->tex_format, - params->parent.alloc_params, params->v_info, params->plane, - params->valign, params->parent.user_data, params->parent.notify); + &alloc_params, params->v_info, params->plane, params->valign, + params->parent.user_data, params->parent.notify); - if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_GPU_HANDLE) { - GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD); - } if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_SYSMEM) { - GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD); - if (mem->pbo) { - GST_MINI_OBJECT_FLAG_SET (mem->pbo, - GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD); - mem->pbo->mem.data = params->parent.wrapped_data; - } mem->mem.mem.data = params->parent.wrapped_data; } diff --git a/gst-libs/gst/gl/meson.build b/gst-libs/gst/gl/meson.build index bfaaf1d..289074b 100644 --- a/gst-libs/gst/gl/meson.build +++ b/gst-libs/gst/gl/meson.build @@ -82,6 +82,7 @@ gl_headers = gir_gl_headers + [ gl_prototype_headers = [ 'glprototypes/all_functions.h', 'glprototypes/base.h', + 'glprototypes/buffer_storage.h', 'glprototypes/blending.h', 'glprototypes/buffers.h', 'glprototypes/debug.h', -- 2.7.4