From a371bac3fba73f92aaa63a68d8ab1ae81a1d1031 Mon Sep 17 00:00:00 2001 From: Gunnar Sletta Date: Tue, 6 Jan 2015 08:40:21 +0100 Subject: [PATCH] Reuse the memory we use for vertex uploads. Every batch would have its own buffer with its own memory and the original idea was that it would stabilize so that the same batch would be used over and over, so the memory allocatd for each batch's buffer would be reused. In practice batches get discarded and recycled in somewhat arbitrary order so we always end up reallocating. Instead, use a single buffer for all uploads. This saves us several reallocations per frame, and also prevents pooling of several potentially large allocated blocks inside the renderer. The exception is when using visualization or for drivers which have broken index buffer support (nouveau only currently), where we keep the existing behavior as the buffer data is needed during rendering and visualization. Change-Id: Id29095e00128ec1ee6d4ac3caa8f2c17cc7bbd27 Reviewed-by: Robin Burchell --- src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp | 68 +++++++++++++++++++---- src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h | 8 ++- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp b/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp index 5a78b90..a008552 100644 --- a/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp +++ b/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp @@ -760,6 +760,10 @@ Renderer::Renderer(QSGRenderContext *ctx) , m_clipMatrixId(0) , m_currentClip(0) , m_currentClipType(NoClip) + , m_vertexUploadPool(256) +#ifdef QSG_SEPARATE_INDEX_BUFFER + , m_indexUploadPool(64) +#endif , m_vao(0) , m_visualizeMode(VisualizeNothing) { @@ -817,6 +821,11 @@ Renderer::Renderer(QSGRenderContext *ctx) static void qsg_wipeBuffer(Buffer *buffer, QOpenGLFunctions *funcs) { funcs->glDeleteBuffers(1, &buffer->id); + // The free here is ok because we're in one of two situations. + // 1. We're using the upload pool in which case unmap will have set the + // data pointer to 0 and calling free on 0 is ok. + // 2. We're using dedicated buffers because of visualization or IBO workaround + // and the data something we malloced and must be freed. free(buffer->data); } @@ -870,14 +879,26 @@ void Renderer::invalidateAndRecycleBatch(Batch *b) * * ref: http://www.opengl.org/wiki/Buffer_Object */ -void Renderer::map(Buffer *buffer, int byteSize) +void Renderer::map(Buffer *buffer, int byteSize, bool isIndexBuf) { - if (buffer->size != byteSize) { - if (buffer->data) - free(buffer->data); + if (!m_context->hasBrokenIndexBufferObjects() && m_visualizeMode == VisualizeNothing) { + // Common case, use a shared memory pool for uploading vertex data to avoid + // excessive reevaluation + QDataBuffer &pool = +#ifdef QSG_SEPARATE_INDEX_BUFFER + isIndexBuf ? m_indexUploadPool : m_vertexUploadPool; +#else + m_vertexUploadPool; + Q_UNUSED(isIndexBuf); +#endif + if (byteSize > pool.size()) + pool.resize(byteSize); + buffer->data = pool.data(); + } else { buffer->data = (char *) malloc(byteSize); - buffer->size = byteSize; } + buffer->size = byteSize; + } void Renderer::unmap(Buffer *buffer, bool isIndexBuf) @@ -887,6 +908,10 @@ void Renderer::unmap(Buffer *buffer, bool isIndexBuf) GLenum target = isIndexBuf ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER; glBindBuffer(target, buffer->id); glBufferData(target, buffer->size, buffer->data, m_bufferStrategy); + + if (!m_context->hasBrokenIndexBufferObjects() && m_visualizeMode == VisualizeNothing) { + buffer->data = 0; + } } BatchRootInfo *Renderer::batchRootInfo(Node *node) @@ -1790,7 +1815,7 @@ void Renderer::uploadBatch(Batch *b) } #ifdef QSG_SEPARATE_INDEX_BUFFER - map(&b->ibo, ibufferSize); + map(&b->ibo, ibufferSize, true); #else bufferSize += ibufferSize; #endif @@ -2581,14 +2606,37 @@ void Renderer::render() m_zRange = 1.0 / (m_nextRenderOrder); } + int largestVBO = 0; +#ifdef QSG_SEPARATE_INDEX_BUFFER + int largestIBO = 0; +#endif if (Q_UNLIKELY(debug_upload())) qDebug() << "Uploading Opaque Batches:"; - for (int i=0; ivbo.size, largestVBO); +#ifdef QSG_SEPARATE_INDEX_BUFFER + largestIBO = qMax(b->ibo.size, largestIBO); +#endif + uploadBatch(b); + } if (Q_UNLIKELY(debug_upload())) qDebug() << "Uploading Alpha Batches:"; - for (int i=0; ivbo.size, largestVBO); +#ifdef QSG_SEPARATE_INDEX_BUFFER + largestIBO = qMax(b->ibo.size, largestIBO); +#endif + } + + if (largestVBO * 2 < m_vertexUploadPool.size()) + m_vertexUploadPool.resize(largestVBO * 2); +#ifdef QSG_SEPARATE_INDEX_BUFFER + if (largestIBO * 2 < m_indexUploadPool.size()) + m_indexUploadPool.resize(largestIBO * 2); +#endif renderBatches(); diff --git a/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h b/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h index 29604cf..50bea04 100644 --- a/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h +++ b/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h @@ -255,6 +255,8 @@ inline QDebug operator << (QDebug d, const Rect &r) { struct Buffer { GLuint id; int size; + // Data is only valid while preparing the upload. Exception is if we are using the + // broken IBO workaround or we are using a visualization mode. char *data; }; @@ -549,7 +551,7 @@ private: friend class Updater; - void map(Buffer *buffer, int size); + void map(Buffer *buffer, int size, bool isIndexBuf = false); void unmap(Buffer *buffer, bool isIndexBuf = false); void buildRenderListsFromScratch(); @@ -640,6 +642,10 @@ private: const QSGClipNode *m_currentClip; ClipType m_currentClipType; + QDataBuffer m_vertexUploadPool; +#ifdef QSG_SEPARATE_INDEX_BUFFER + QDataBuffer m_indexUploadPool; +#endif // For minimal OpenGL core profile support QOpenGLVertexArrayObject *m_vao; -- 2.7.4