freedreno: support growable cmdstream buffers
authorRob Clark <robclark@freedesktop.org>
Mon, 20 Jun 2016 18:06:24 +0000 (14:06 -0400)
committerRob Clark <robclark@freedesktop.org>
Wed, 20 Jul 2016 23:42:21 +0000 (19:42 -0400)
The issue that userspace needed to solve is that there is ~two orders of
magnitude size difference in cmdstream buffers (both for gmem commands
and for draw commands), and that the previous practice of allocating
worst-case sizes is quite wasteful.  Previously a submit would be
constructed (for example) like:

  CMD  TARGET  DESCRIPTION
   g0    N     gmem/tiling commands
   b0    Y     binning commands
   d0    Y     draw commands

Which, after the one non-IB-target cmd buffer is inserted into the
kernel controlled ringbuffer, looks like (not to scale):

         b0:           d0:
        +-----+       +-----+
   IB1  | ... |       | ... |
        +-----+       +-----+
         ^             ^
         |             |
         +-----+       +-+---------+
         g0:   |         |         |
        +----+----+----+----+----+----+----
   IB0  | .. | IB | .. | IB | .. | IB | ...
        +----+----+----+----+----+----+----
         ^              tile0     tile1
         |
         +-----------+
  userspace          |
  ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  kernel             |
               ----+----+----
   ringbuffer  ... | IB | ...
               ----+----+----

Now, multiple physical cmdstream buffers per fd_ringbuffer are supported,
so this becomes:

  CMD  TARGET  DESCRIPTION
   g0    N
   ...   N     gmem/tiling commands
   gN    N
   b0    Y
   ...   Y     binning commands
   bN    Y
   d0    Y
   ...   Y     draw commands
   dN    Y

Which, after the non-IB-target cmd buffers (g0..gN) are inserted into
the kernel controlled ringbuffer, looks like:

             b0:      b1            d0:      d1
            +-----+  +-----+        +-----+  +-----+
       IB1  | ... |  | ... | ...    | ... |  | ... | ...
            +-----+  +-----+        +-----+  +-----+
             ^        ^              ^        ^
             |        |              |        |
             |        +-+            |  +-----+------+
             +-----+    |            |  |            |
                   |    |         +--+----------+    |
             g0:   |    |         |     |       |    |
            +----+----+----+----+----+----+---+----+----+----
       IB0  | .. | IB | IB | .. | IB | IB |.. | IB | IB |...
            +----+----+----+----+----+----+---+----+----+----
             ^                   tile0         tile1
             | to b0  to b1
             |   |      |          to|d0    to|d1
             |   |      +----+       |      +-+-----------+
             |   |           |       |      |             |
             |   +------+    |       +-+-------------+    |
             |    g1:   |    |         |    |        |    |
             |   +----+----+----+----+----+----+---+----+----+----
       IB0   |   | .. | IB | IB | .. | IB | IB |.. | IB | IB |...
             |   +----+----+----+----+----+----+---+----+----+----
             |    ^                   tileX         tileY
             |    |
             |    +-----------+
             +-----------+    |
      userspace          |    |
      ~~~~~~~~~~~~~~~~~~~|~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      kernel             |    |
                   ----+----+----+----
       ringbuffer  ... | IB | IB | ...
                   ----+----+----+----

Signed-off-by: Rob Clark <robclark@freedesktop.org>
freedreno/freedreno_priv.h
freedreno/freedreno_ringbuffer.c
freedreno/freedreno_ringbuffer.h
freedreno/kgsl/kgsl_ringbuffer.c
freedreno/msm/msm_ringbuffer.c

index 9737b32..cdfdbe8 100644 (file)
@@ -134,12 +134,14 @@ struct fd_ringmarker {
 struct fd_ringbuffer_funcs {
        void * (*hostptr)(struct fd_ringbuffer *ring);
        int (*flush)(struct fd_ringbuffer *ring, uint32_t *last_start);
+       void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
        void (*reset)(struct fd_ringbuffer *ring);
        void (*emit_reloc)(struct fd_ringbuffer *ring,
                        const struct fd_reloc *reloc);
-       void (*emit_reloc_ring)(struct fd_ringbuffer *ring,
-                       struct fd_ringbuffer *target,
+       uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
+                       struct fd_ringbuffer *target, uint32_t cmd_idx,
                        uint32_t submit_offset, uint32_t size);
+       uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
        void (*destroy)(struct fd_ringbuffer *ring);
 };
 
index 34a06d8..22dafb3 100644 (file)
@@ -45,10 +45,9 @@ fd_ringbuffer_new(struct fd_pipe *pipe, uint32_t size)
        if (!ring)
                return NULL;
 
-       ring->size = size;
        ring->pipe = pipe;
        ring->start = ring->funcs->hostptr(ring);
-       ring->end = &(ring->start[size/4]);
+       ring->end = &(ring->start[ring->size/4]);
 
        ring->cur = ring->last_start = ring->start;
 
@@ -87,6 +86,22 @@ int fd_ringbuffer_flush(struct fd_ringbuffer *ring)
        return ring->funcs->flush(ring, ring->last_start);
 }
 
+void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+       assert(ring->funcs->grow);     /* unsupported on kgsl */
+
+       /* there is an upper bound on IB size, which appears to be 0x100000 */
+       if (ring->size < 0x100000)
+               ring->size *= 2;
+
+       ring->funcs->grow(ring, ring->size);
+
+       ring->start = ring->funcs->hostptr(ring);
+       ring->end = &(ring->start[ring->size/4]);
+
+       ring->cur = ring->last_start = ring->start;
+}
+
 uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring)
 {
        return ring->last_timestamp;
@@ -108,7 +123,14 @@ void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
        submit_offset = offset_bytes(target->cur, target->ring->start);
        size = offset_bytes(end->cur, target->cur);
 
-       ring->funcs->emit_reloc_ring(ring, target->ring, submit_offset, size);
+       ring->funcs->emit_reloc_ring(ring, target->ring, 0, submit_offset, size);
+}
+
+uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
+{
+       if (!ring->funcs->cmd_count)
+               return 1;
+       return ring->funcs->cmd_count(ring);
 }
 
 uint32_t
@@ -116,9 +138,7 @@ fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
                struct fd_ringbuffer *target, uint32_t cmd_idx)
 {
        uint32_t size = offset_bytes(target->cur, target->start);
-       assert(cmd_idx == 0);
-       ring->funcs->emit_reloc_ring(ring, target, 0, size);
-       return size;
+       return ring->funcs->emit_reloc_ring(ring, target, cmd_idx, 0, size);
 }
 
 struct fd_ringmarker * fd_ringmarker_new(struct fd_ringbuffer *ring)
index 643f50b..8899b5d 100644 (file)
@@ -56,6 +56,7 @@ void fd_ringbuffer_set_parent(struct fd_ringbuffer *ring,
                struct fd_ringbuffer *parent);
 void fd_ringbuffer_reset(struct fd_ringbuffer *ring);
 int fd_ringbuffer_flush(struct fd_ringbuffer *ring);
+void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords);
 uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring);
 
 static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring,
@@ -77,6 +78,7 @@ struct fd_reloc {
 void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
 will_be_deprecated void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
                struct fd_ringmarker *target, struct fd_ringmarker *end);
+uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring);
 uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
                struct fd_ringbuffer *target, uint32_t cmd_idx);
 
index a0bc9d0..7b3298a 100644 (file)
@@ -173,12 +173,14 @@ static void kgsl_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
        kgsl_pipe_add_submit(to_kgsl_pipe(ring->pipe), kgsl_bo);
 }
 
-static void kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
-               struct fd_ringbuffer *target,
+static uint32_t kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
+               struct fd_ringbuffer *target, uint32_t cmd_idx,
                uint32_t submit_offset, uint32_t size)
 {
        struct kgsl_ringbuffer *target_ring = to_kgsl_ringbuffer(target);
+       assert(cmd_idx == 0);
        (*ring->cur++) = target_ring->bo->gpuaddr + submit_offset;
+       return size;
 }
 
 static void kgsl_ringbuffer_destroy(struct fd_ringbuffer *ring)
@@ -213,6 +215,7 @@ drm_private struct fd_ringbuffer * kgsl_ringbuffer_new(struct fd_pipe *pipe,
 
        ring = &kgsl_ring->base;
        ring->funcs = &funcs;
+       ring->size = size;
 
        kgsl_ring->bo = kgsl_rb_bo_new(to_kgsl_pipe(pipe), size);
        if (!kgsl_ring->bo) {
index 301ac66..86fc83e 100644 (file)
  * a backing bo, and a reloc table.
  */
 struct msm_cmd {
+       struct list_head list;
+
        struct fd_ringbuffer *ring;
        struct fd_bo *ring_bo;
 
        /* reloc's table: */
        struct drm_msm_gem_submit_reloc *relocs;
        uint32_t nr_relocs, max_relocs;
+
+       uint32_t size;
 };
 
 struct msm_ringbuffer {
@@ -75,10 +79,28 @@ struct msm_ringbuffer {
        struct msm_cmd **cmds;
        uint32_t nr_cmds, max_cmds;
 
-       /* current cmd-buffer: */
-       struct msm_cmd *cmd;
+       /* List of physical cmdstream buffers (msm_cmd) assocated with this
+        * logical fd_ringbuffer.
+        *
+        * Note that this is different from msm_ringbuffer::cmds (which
+        * shadows msm_ringbuffer::submit::cmds for tracking submit ioctl
+        * related stuff, and *only* is tracked in the parent ringbuffer.
+        * And only has "completed" cmd buffers (ie. we already know the
+        * size) added via get_cmd().
+        */
+       struct list_head cmd_list;
+
+       int is_growable;
+       unsigned cmd_count;
 };
 
+static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x)
+{
+       return (struct msm_ringbuffer *)x;
+}
+
+#define INIT_SIZE 0x1000
+
 static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
 drm_private extern pthread_mutex_t table_lock;
 
@@ -118,12 +140,15 @@ static void ring_cmd_del(struct msm_cmd *cmd)
 {
        if (cmd->ring_bo)
                ring_bo_del(cmd->ring->pipe->dev, cmd->ring_bo);
+       list_del(&cmd->list);
+       to_msm_ringbuffer(cmd->ring)->cmd_count--;
        free(cmd->relocs);
        free(cmd);
 }
 
 static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size)
 {
+       struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
        struct msm_cmd *cmd = calloc(1, sizeof(*cmd));
 
        if (!cmd)
@@ -134,6 +159,9 @@ static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size)
        if (!cmd->ring_bo)
                goto fail;
 
+       list_addtail(&cmd->list, &msm_ring->cmd_list);
+       msm_ring->cmd_count++;
+
        return cmd;
 
 fail:
@@ -158,9 +186,11 @@ static void *grow(void *ptr, uint32_t nr, uint32_t *max, uint32_t sz)
        (x)->nr_ ## name ++; \
 })
 
-static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x)
+static struct msm_cmd *current_cmd(struct fd_ringbuffer *ring)
 {
-       return (struct msm_ringbuffer *)x;
+       struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+       assert(!LIST_IS_EMPTY(&msm_ring->cmd_list));
+       return LIST_LAST_ENTRY(&msm_ring->cmd_list, struct msm_cmd, list);
 }
 
 static uint32_t append_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
@@ -248,12 +278,13 @@ static void get_cmd(struct fd_ringbuffer *ring, struct msm_cmd *target_cmd,
        cmd->submit_offset = submit_offset;
        cmd->size = size;
        cmd->pad = 0;
+
+       target_cmd->size = size;
 }
 
 static void * msm_ringbuffer_hostptr(struct fd_ringbuffer *ring)
 {
-       struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-       return fd_bo_map(msm_ring->cmd->ring_bo);
+       return fd_bo_map(current_cmd(ring)->ring_bo);
 }
 
 static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd,
@@ -271,6 +302,15 @@ static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd,
        return i;
 }
 
+static void delete_cmds(struct msm_ringbuffer *msm_ring)
+{
+       struct msm_cmd *cmd, *tmp;
+
+       LIST_FOR_EACH_ENTRY_SAFE(cmd, tmp, &msm_ring->cmd_list, list) {
+               ring_cmd_del(cmd);
+       }
+}
+
 static void flush_reset(struct fd_ringbuffer *ring)
 {
        struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
@@ -288,11 +328,36 @@ static void flush_reset(struct fd_ringbuffer *ring)
                target_cmd->nr_relocs = 0;
        }
 
-       msm_ring->cmd->nr_relocs = 0;
        msm_ring->submit.nr_cmds = 0;
        msm_ring->submit.nr_bos = 0;
        msm_ring->nr_cmds = 0;
        msm_ring->nr_bos = 0;
+
+       if (msm_ring->is_growable) {
+               delete_cmds(msm_ring);
+       } else {
+               /* in old mode, just reset the # of relocs: */
+               current_cmd(ring)->nr_relocs = 0;
+       }
+}
+
+static void finalize_current_cmd(struct fd_ringbuffer *ring, uint32_t *last_start)
+{
+       uint32_t submit_offset, size, type;
+       struct fd_ringbuffer *parent;
+
+       if (ring->parent) {
+               parent = ring->parent;
+               type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
+       } else {
+               parent = ring;
+               type = MSM_SUBMIT_CMD_BUF;
+       }
+
+       submit_offset = offset_bytes(last_start, ring->start);
+       size = offset_bytes(ring->cur, last_start);
+
+       get_cmd(parent, current_cmd(ring), submit_offset, size, type);
 }
 
 static void dump_submit(struct msm_ringbuffer *msm_ring)
@@ -323,13 +388,10 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start
        struct drm_msm_gem_submit req = {
                        .pipe = to_msm_pipe(ring->pipe)->pipe,
        };
-       uint32_t i, submit_offset, size;
+       uint32_t i;
        int ret;
 
-       submit_offset = offset_bytes(last_start, ring->start);
-       size = offset_bytes(ring->cur, last_start);
-
-       get_cmd(ring, msm_ring->cmd, submit_offset, size, MSM_SUBMIT_CMD_BUF);
+       finalize_current_cmd(ring, last_start);
 
        /* needs to be after get_cmd() as that could create bos/cmds table: */
        req.bos = VOID2U64(msm_ring->submit.bos),
@@ -367,6 +429,13 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start
        return ret;
 }
 
+static void msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size)
+{
+       assert(to_msm_ringbuffer(ring)->is_growable);
+       finalize_current_cmd(ring, ring->last_start);
+       ring_cmd_new(ring, size);
+}
+
 static void msm_ringbuffer_reset(struct fd_ringbuffer *ring)
 {
        flush_reset(ring);
@@ -375,14 +444,14 @@ static void msm_ringbuffer_reset(struct fd_ringbuffer *ring)
 static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
                const struct fd_reloc *r)
 {
-       struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
        struct fd_ringbuffer *parent = ring->parent ? ring->parent : ring;
        struct msm_bo *msm_bo = to_msm_bo(r->bo);
        struct drm_msm_gem_submit_reloc *reloc;
-       uint32_t idx = APPEND(msm_ring->cmd, relocs);
+       struct msm_cmd *cmd = current_cmd(ring);
+       uint32_t idx = APPEND(cmd, relocs);
        uint32_t addr;
 
-       reloc = &msm_ring->cmd->relocs[idx];
+       reloc = &cmd->relocs[idx];
 
        reloc->reloc_idx = bo2idx(parent, r->bo, r->flags);
        reloc->reloc_offset = r->offset;
@@ -398,26 +467,53 @@ static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
        (*ring->cur++) = addr | r->or;
 }
 
-static void msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
-               struct fd_ringbuffer *target,
+static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
+               struct fd_ringbuffer *target, uint32_t cmd_idx,
                uint32_t submit_offset, uint32_t size)
 {
-       struct msm_cmd *cmd = to_msm_ringbuffer(target)->cmd;
+       struct msm_cmd *cmd = NULL;
+       uint32_t idx = 0;
+
+       LIST_FOR_EACH_ENTRY(cmd, &to_msm_ringbuffer(target)->cmd_list, list) {
+               if (idx == cmd_idx)
+                       break;
+               idx++;
+       }
 
-       get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF);
+       assert(cmd && (idx == cmd_idx));
+
+       if (idx < (to_msm_ringbuffer(target)->cmd_count - 1)) {
+               /* All but the last cmd buffer is fully "baked" (ie. already has
+                * done get_cmd() to add it to the cmds table).  But in this case,
+                * the size we get is invalid (since it is calculated from the
+                * last cmd buffer):
+                */
+               size = cmd->size;
+       } else {
+               get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF);
+       }
 
        msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
                .bo = cmd->ring_bo,
                .flags = FD_RELOC_READ,
                .offset = submit_offset,
        });
+
+       return size;
+}
+
+static uint32_t msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
+{
+       return to_msm_ringbuffer(ring)->cmd_count;
 }
 
 static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
 {
        struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-       if (msm_ring->cmd)
-               ring_cmd_del(msm_ring->cmd);
+
+       flush_reset(ring);
+       delete_cmds(msm_ring);
+
        free(msm_ring->submit.cmds);
        free(msm_ring->submit.bos);
        free(msm_ring->bos);
@@ -428,9 +524,11 @@ static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
 static const struct fd_ringbuffer_funcs funcs = {
                .hostptr = msm_ringbuffer_hostptr,
                .flush = msm_ringbuffer_flush,
+               .grow = msm_ringbuffer_grow,
                .reset = msm_ringbuffer_reset,
                .emit_reloc = msm_ringbuffer_emit_reloc,
                .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring,
+               .cmd_count = msm_ringbuffer_cmd_count,
                .destroy = msm_ringbuffer_destroy,
 };
 
@@ -446,15 +544,20 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe,
                goto fail;
        }
 
+       if (size == 0) {
+               assert(pipe->dev->version >= FD_VERSION_UNLIMITED_CMDS);
+               size = INIT_SIZE;
+               msm_ring->is_growable = TRUE;
+       }
+
+       list_inithead(&msm_ring->cmd_list);
+
        ring = &msm_ring->base;
        ring->funcs = &funcs;
+       ring->size = size;
        ring->pipe = pipe;   /* needed in ring_cmd_new() */
 
-       msm_ring->cmd = ring_cmd_new(ring, size);
-       if (!msm_ring->cmd) {
-               ERROR_MSG("command buffer allocation failed");
-               goto fail;
-       }
+       ring_cmd_new(ring, size);
 
        return ring;
 fail: