From 3cb96b18f64b0db0d68d917247710417de0b982d Mon Sep 17 00:00:00 2001
From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 21 Aug 2022 23:49:14 -0400
Subject: [PATCH] glthread: rewrite CallList merging and do it in the app
 thread

This looks simpler and hopefully faster, but it may just move the overhead
to the app thread.

Acked-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18199>
---
 src/mesa/main/glthread.c         |  6 +++
 src/mesa/main/glthread.h         |  3 ++
 src/mesa/main/glthread_list.c    | 92 ++++++++++++++++++++--------------------
 src/mesa/main/glthread_marshal.h |  6 ---
 4 files changed, 56 insertions(+), 51 deletions(-)

diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index b737d97..c4bdbaa 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -244,6 +244,8 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
    if (false) {
       glthread_unmarshal_batch(next, NULL, 0);
       _glapi_set_dispatch(ctx->CurrentClientDispatch);
+
+      glthread->LastCallList = NULL;
       return;
    }
 
@@ -256,6 +258,8 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
    glthread->next = (glthread->next + 1) % MARSHAL_MAX_BATCHES;
    glthread->next_batch = &glthread->batches[glthread->next];
    glthread->used = 0;
+
+   glthread->LastCallList = NULL;
 }
 
 /**
@@ -293,6 +297,8 @@ _mesa_glthread_finish(struct gl_context *ctx)
       next->used = glthread->used;
       glthread->used = 0;
 
+      glthread->LastCallList = NULL;
+
       /* Since glthread_unmarshal_batch changes the dispatch to direct,
        * restore it after it's done.
        */
diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
index e0f9251..c94f63b 100644
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -240,6 +240,9 @@ struct glthread_state
    GLuint CurrentDrawFramebuffer;
    GLuint CurrentReadFramebuffer;
    GLuint CurrentProgram;
+
+   /** The last added call of the given function. */
+   struct marshal_cmd_CallList *LastCallList;
 };
 
 void _mesa_glthread_init(struct gl_context *ctx);
diff --git a/src/mesa/main/glthread_list.c b/src/mesa/main/glthread_list.c
index 0eab38f..32c74ff 100644
--- a/src/mesa/main/glthread_list.c
+++ b/src/mesa/main/glthread_list.c
@@ -26,66 +26,68 @@
 #include "main/glthread_marshal.h"
 #include "main/dispatch.h"
 
+struct marshal_cmd_CallList
+{
+   struct marshal_cmd_base cmd_base;
+   GLuint num;
+   GLuint list[];
+};
+
 uint32_t
 _mesa_unmarshal_CallList(struct gl_context *ctx, const struct marshal_cmd_CallList *cmd, const uint64_t *last)
 {
-   const GLuint list = cmd->list;
-   uint64_t *ptr = (uint64_t *) cmd;
-   const unsigned cmd_size = align(sizeof(*cmd), 8) / 8;
-
-   assert(cmd_size == cmd->cmd_base.cmd_size);
-   ptr += cmd_size;
+   const GLuint num = cmd->num;
 
-   if (ptr < last) {
-      const struct marshal_cmd_base *next =
-         (const struct marshal_cmd_base *)ptr;
+   if (cmd->cmd_base.cmd_size == sizeof(*cmd) / 8) {
+      CALL_CallList(ctx->CurrentServerDispatch, (num));
+   } else {
+      CALL_CallLists(ctx->CurrentServerDispatch, (num, GL_UNSIGNED_INT, cmd->list));
+   }
 
-      /* If the 'next' is also a DISPATCH_CMD_CallList, we transform 'cmd' and 'next' in a CALL_CallLists.
-       * If the following commands are also CallList they're including in the CallLists we're building.
-       */
-      if (next->cmd_id == DISPATCH_CMD_CallList) {
-         const int max_list_count = 2048;
-         struct marshal_cmd_CallList *next_callist = (struct marshal_cmd_CallList *) next;
-         uint32_t *lists = alloca(max_list_count * sizeof(uint32_t));
+   return cmd->cmd_base.cmd_size;
+}
 
-         lists[0] = cmd->list;
-         lists[1] = next_callist->list;
+void GLAPIENTRY
+_mesa_marshal_CallList(GLuint list)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct glthread_state *glthread = &ctx->GLThread;
+   struct marshal_cmd_CallList *last = glthread->LastCallList;
 
-         int count = 2;
+   _mesa_glthread_CallList(ctx, list);
 
-         assert(cmd_size == next_callist->cmd_base.cmd_size);
-         ptr += cmd_size;
+   /* If the last call is CallList and there is enough space to append another list... */
+   if (last &&
+       (uint64_t*)last + last->cmd_base.cmd_size ==
+       &glthread->next_batch->buffer[glthread->used] &&
+       glthread->used + 1 <= MARSHAL_MAX_CMD_SIZE / 8) {
+      STATIC_ASSERT(sizeof(*last) == 8);
 
-         while (ptr < last && count < max_list_count) {
-            next = (const struct marshal_cmd_base *)ptr;
-            if (next->cmd_id == DISPATCH_CMD_CallList) {
-               next_callist = (struct marshal_cmd_CallList *) next;
-               lists[count++] = next_callist->list;
-               assert(cmd_size == next_callist->cmd_base.cmd_size);
-               ptr += cmd_size;
-            } else {
-               break;
-            }
+      /* Add the list to the last call. */
+      if (last->cmd_base.cmd_size > sizeof(*last) / 8) {
+         last->list[last->num++] = list;
+         if (last->num % 2 == 1) {
+            last->cmd_base.cmd_size++;
+            glthread->used++;
          }
-
-         CALL_CallLists(ctx->CurrentServerDispatch, (count, GL_UNSIGNED_INT, lists));
-
-         return (uint32_t) (ptr - (uint64_t*)cmd);
+      } else {
+         /* Initially, num contains the first list. After we increase cmd_size,
+          * num contains the number of lists and list[] contains the lists.
+          */
+         last->list[0] = last->num;
+         last->list[1] = list;
+         last->num = 2;
+         last->cmd_base.cmd_size++;
+         glthread->used++;
       }
+      assert(align(sizeof(*last) + last->num * 4, 8) / 8 == last->cmd_base.cmd_size);
+      return;
    }
 
-   CALL_CallList(ctx->CurrentServerDispatch, (list));
-   return cmd_size;
-}
-
-void GLAPIENTRY
-_mesa_marshal_CallList(GLuint list)
-{
-   GET_CURRENT_CONTEXT(ctx);
    int cmd_size = sizeof(struct marshal_cmd_CallList);
    struct marshal_cmd_CallList *cmd;
    cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_CallList, cmd_size);
-   cmd->list = list;
+   cmd->num = list;
 
-   _mesa_glthread_CallList(ctx, list);
+   glthread->LastCallList = cmd;
 }
diff --git a/src/mesa/main/glthread_marshal.h b/src/mesa/main/glthread_marshal.h
index 8d4d3a5..2b3571f 100644
--- a/src/mesa/main/glthread_marshal.h
+++ b/src/mesa/main/glthread_marshal.h
@@ -865,10 +865,4 @@ _mesa_glthread_DeleteFramebuffers(struct gl_context *ctx, GLsizei n,
    }
 }
 
-struct marshal_cmd_CallList
-{
-   struct marshal_cmd_base cmd_base;
-   GLuint list;
-};
-
 #endif /* MARSHAL_H */
-- 
2.7.4