From a371bac3fba73f92aaa63a68d8ab1ae81a1d1031 Mon Sep 17 00:00:00 2001
From: Gunnar Sletta <gunnar@sletta.org>
Date: Tue, 6 Jan 2015 08:40:21 +0100
Subject: [PATCH] Reuse the memory we use for vertex uploads.

Every batch would have its own buffer with its own memory
and the original idea was that it would stabilize so that
the same batch would be used over and over, so the memory
allocatd for each batch's buffer would be reused. In practice
batches get discarded and recycled in somewhat arbitrary
order so we always end up reallocating.

Instead, use a single buffer for all uploads. This saves us several
reallocations per frame, and also prevents pooling of several
potentially large allocated blocks inside the renderer.

The exception is when using visualization or for drivers
which have broken index buffer support (nouveau only currently),
where we keep the existing behavior as the buffer data is
needed during rendering and visualization.

Change-Id: Id29095e00128ec1ee6d4ac3caa8f2c17cc7bbd27
Reviewed-by: Robin Burchell <robin.burchell@viroteck.net>
---
 src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp | 68 +++++++++++++++++++----
 src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h |  8 ++-
 2 files changed, 65 insertions(+), 11 deletions(-)
diff --git a/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp b/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp
index 5a78b90..a008552 100644
--- a/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp
+++ b/src/quick/scenegraph/coreapi/qsgbatchrenderer.cpp
@@ -760,6 +760,10 @@ Renderer::Renderer(QSGRenderContext *ctx)
     , m_clipMatrixId(0)
     , m_currentClip(0)
     , m_currentClipType(NoClip)
+    , m_vertexUploadPool(256)
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+    , m_indexUploadPool(64)
+#endif
     , m_vao(0)
     , m_visualizeMode(VisualizeNothing)
 {
@@ -817,6 +821,11 @@ Renderer::Renderer(QSGRenderContext *ctx)
 static void qsg_wipeBuffer(Buffer *buffer, QOpenGLFunctions *funcs)
 {
     funcs->glDeleteBuffers(1, &buffer->id);
+    // The free here is ok because we're in one of two situations.
+    // 1. We're using the upload pool in which case unmap will have set the
+    //    data pointer to 0 and calling free on 0 is ok.
+    // 2. We're using dedicated buffers because of visualization or IBO workaround
+    //    and the data something we malloced and must be freed.
     free(buffer->data);
 }
 
@@ -870,14 +879,26 @@ void Renderer::invalidateAndRecycleBatch(Batch *b)
  *
  * ref: http://www.opengl.org/wiki/Buffer_Object
  */
-void Renderer::map(Buffer *buffer, int byteSize)
+void Renderer::map(Buffer *buffer, int byteSize, bool isIndexBuf)
 {
-    if (buffer->size != byteSize) {
-        if (buffer->data)
-            free(buffer->data);
+    if (!m_context->hasBrokenIndexBufferObjects() && m_visualizeMode == VisualizeNothing) {
+        // Common case, use a shared memory pool for uploading vertex data to avoid
+        // excessive reevaluation
+        QDataBuffer<char> &pool =
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+                isIndexBuf ? m_indexUploadPool : m_vertexUploadPool;
+#else
+                m_vertexUploadPool;
+        Q_UNUSED(isIndexBuf);
+#endif
+        if (byteSize > pool.size())
+            pool.resize(byteSize);
+        buffer->data = pool.data();
+    } else {
         buffer->data = (char *) malloc(byteSize);
-        buffer->size = byteSize;
     }
+    buffer->size = byteSize;
+
 }
 
 void Renderer::unmap(Buffer *buffer, bool isIndexBuf)
@@ -887,6 +908,10 @@ void Renderer::unmap(Buffer *buffer, bool isIndexBuf)
     GLenum target = isIndexBuf ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
     glBindBuffer(target, buffer->id);
     glBufferData(target, buffer->size, buffer->data, m_bufferStrategy);
+
+    if (!m_context->hasBrokenIndexBufferObjects() && m_visualizeMode == VisualizeNothing) {
+        buffer->data = 0;
+    }
 }
 
 BatchRootInfo *Renderer::batchRootInfo(Node *node)
@@ -1790,7 +1815,7 @@ void Renderer::uploadBatch(Batch *b)
         }
 
 #ifdef QSG_SEPARATE_INDEX_BUFFER
-        map(&b->ibo, ibufferSize);
+        map(&b->ibo, ibufferSize, true);
 #else
         bufferSize += ibufferSize;
 #endif
@@ -2581,14 +2606,37 @@ void Renderer::render()
         m_zRange = 1.0 / (m_nextRenderOrder);
     }
 
+    int largestVBO = 0;
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+    int largestIBO = 0;
+#endif
 
     if (Q_UNLIKELY(debug_upload())) qDebug() << "Uploading Opaque Batches:";
-    for (int i=0; i<m_opaqueBatches.size(); ++i)
-        uploadBatch(m_opaqueBatches.at(i));
+    for (int i=0; i<m_opaqueBatches.size(); ++i) {
+        Batch *b = m_opaqueBatches.at(i);
+        largestVBO = qMax(b->vbo.size, largestVBO);
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+        largestIBO = qMax(b->ibo.size, largestIBO);
+#endif
+        uploadBatch(b);
+    }
 
     if (Q_UNLIKELY(debug_upload())) qDebug() << "Uploading Alpha Batches:";
-    for (int i=0; i<m_alphaBatches.size(); ++i)
-        uploadBatch(m_alphaBatches.at(i));
+    for (int i=0; i<m_alphaBatches.size(); ++i) {
+        Batch *b = m_alphaBatches.at(i);
+        uploadBatch(b);
+        largestVBO = qMax(b->vbo.size, largestVBO);
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+        largestIBO = qMax(b->ibo.size, largestIBO);
+#endif
+    }
+
+    if (largestVBO * 2 < m_vertexUploadPool.size())
+        m_vertexUploadPool.resize(largestVBO * 2);
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+    if (largestIBO * 2 < m_indexUploadPool.size())
+        m_indexUploadPool.resize(largestIBO * 2);
+#endif
 
     renderBatches();
 
diff --git a/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h b/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h
index 29604cf..50bea04 100644
--- a/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h
+++ b/src/quick/scenegraph/coreapi/qsgbatchrenderer_p.h
@@ -255,6 +255,8 @@ inline QDebug operator << (QDebug d, const Rect &r) {
 struct Buffer {
     GLuint id;
     int size;
+    // Data is only valid while preparing the upload. Exception is if we are using the
+    // broken IBO workaround or we are using a visualization mode.
     char *data;
 };
 
@@ -549,7 +551,7 @@ private:
 
     friend class Updater;
 
-    void map(Buffer *buffer, int size);
+    void map(Buffer *buffer, int size, bool isIndexBuf = false);
     void unmap(Buffer *buffer, bool isIndexBuf = false);
 
     void buildRenderListsFromScratch();
@@ -640,6 +642,10 @@ private:
     const QSGClipNode *m_currentClip;
     ClipType m_currentClipType;
 
+    QDataBuffer<char> m_vertexUploadPool;
+#ifdef QSG_SEPARATE_INDEX_BUFFER
+    QDataBuffer<char> m_indexUploadPool;
+#endif
     // For minimal OpenGL core profile support
     QOpenGLVertexArrayObject *m_vao;
 
-- 
2.7.4