From ea339137b0cef22385b9076921f7325e82776674 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 14 Mar 2022 17:14:59 -0700
Subject: [PATCH] freedreno/drm: Extract out "softpin" submit/ringbuffer base
 class

We are going to want basically the identical thing, other than
flush_submit_list, for virtio backend.  Now that we've moved various
other dependencies into the base classes, extract out an abstract base
class for submit/ringbuffer.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14900>
---
 src/freedreno/drm/freedreno_ringbuffer_sp.c        | 651 ++++++++++++++++++
 src/freedreno/drm/freedreno_ringbuffer_sp.h        | 125 ++++
 ...buffer_sp.h => freedreno_ringbuffer_sp_reloc.h} |  56 +-
 src/freedreno/drm/meson.build                      |   4 +-
 src/freedreno/drm/msm/msm_pipe.c                   |   9 +-
 src/freedreno/drm/msm/msm_priv.h                   |   6 -
 src/freedreno/drm/msm/msm_ringbuffer_sp.c          | 731 +--------------------
 7 files changed, 839 insertions(+), 743 deletions(-)
 create mode 100644 src/freedreno/drm/freedreno_ringbuffer_sp.c
 create mode 100644 src/freedreno/drm/freedreno_ringbuffer_sp.h
 rename src/freedreno/drm/{msm/msm_ringbuffer_sp.h => freedreno_ringbuffer_sp_reloc.h} (62%)

diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.c b/src/freedreno/drm/freedreno_ringbuffer_sp.c
new file mode 100644
index 0000000..37d8eeb
--- /dev/null
+++ b/src/freedreno/drm/freedreno_ringbuffer_sp.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <pthread.h>
+
+#include "util/hash_table.h"
+#include "util/os_file.h"
+#include "util/slab.h"
+
+#include "freedreno_ringbuffer_sp.h"
+
+/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
+ * by avoiding the additional tracking necessary to build cmds/relocs tables
+ * (but still builds a bos table)
+ */
+
+#define INIT_SIZE 0x1000
+
+#define SUBALLOC_SIZE (32 * 1024)
+
+/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
+ * instead use a condition-variable.  Note that pipe->flush() is not expected
+ * to be a common/hot path.
+ */
+static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
+
+static void finalize_current_cmd(struct fd_ringbuffer *ring);
+static struct fd_ringbuffer *
+fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
+                      enum fd_ringbuffer_flags flags);
+
+/* add (if needed) bo to submit and return index: */
+uint32_t
+fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
+{
+   uint32_t idx;
+
+   /* NOTE: it is legal to use the same bo on different threads for
+    * different submits.  But it is not legal to use the same submit
+    * from different threads.
+    */
+   idx = READ_ONCE(bo->idx);
+
+   if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
+      uint32_t hash = _mesa_hash_pointer(bo);
+      struct hash_entry *entry;
+
+      entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
+      if (entry) {
+         /* found */
+         idx = (uint32_t)(uintptr_t)entry->data;
+      } else {
+         idx = APPEND(submit, bos, fd_bo_ref(bo));
+
+         _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
+                                            (void *)(uintptr_t)idx);
+      }
+      bo->idx = idx;
+   }
+
+   return idx;
+}
+
+static void
+fd_submit_suballoc_ring_bo(struct fd_submit *submit,
+                           struct fd_ringbuffer_sp *fd_ring, uint32_t size)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+   unsigned suballoc_offset = 0;
+   struct fd_bo *suballoc_bo = NULL;
+
+   if (fd_submit->suballoc_ring) {
+      struct fd_ringbuffer_sp *suballoc_ring =
+         to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
+
+      suballoc_bo = suballoc_ring->ring_bo;
+      suballoc_offset =
+         fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
+
+      suballoc_offset = align(suballoc_offset, 0x10);
+
+      if ((size + suballoc_offset) > suballoc_bo->size) {
+         suballoc_bo = NULL;
+      }
+   }
+
+   if (!suballoc_bo) {
+      // TODO possibly larger size for streaming bo?
+      fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
+      fd_ring->offset = 0;
+   } else {
+      fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
+      fd_ring->offset = suballoc_offset;
+   }
+
+   struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
+
+   fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
+
+   if (old_suballoc_ring)
+      fd_ringbuffer_del(old_suballoc_ring);
+}
+
+static struct fd_ringbuffer *
+fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
+                            enum fd_ringbuffer_flags flags)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+   struct fd_ringbuffer_sp *fd_ring;
+
+   fd_ring = slab_alloc(&fd_submit->ring_pool);
+
+   fd_ring->u.submit = submit;
+
+   /* NOTE: needs to be before _suballoc_ring_bo() since it could
+    * increment the refcnt of the current ring
+    */
+   fd_ring->base.refcnt = 1;
+
+   if (flags & FD_RINGBUFFER_STREAMING) {
+      fd_submit_suballoc_ring_bo(submit, fd_ring, size);
+   } else {
+      if (flags & FD_RINGBUFFER_GROWABLE)
+         size = INIT_SIZE;
+
+      fd_ring->offset = 0;
+      fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
+   }
+
+   if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
+      return NULL;
+
+   return &fd_ring->base;
+}
+
+/**
+ * Prepare submit for flush, always done synchronously.
+ *
+ * 1) Finalize primary ringbuffer, at this point no more cmdstream may
+ *    be written into it, since from the PoV of the upper level driver
+ *    the submit is flushed, even if deferred
+ * 2) Add cmdstream bos to bos table
+ * 3) Update bo fences
+ */
+static bool
+fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
+                        struct fd_submit_fence *out_fence)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+   bool has_shared = false;
+
+   finalize_current_cmd(submit->primary);
+
+   struct fd_ringbuffer_sp *primary =
+      to_fd_ringbuffer_sp(submit->primary);
+
+   for (unsigned i = 0; i < primary->u.nr_cmds; i++)
+      fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
+
+   simple_mtx_lock(&table_lock);
+   for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
+      fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence);
+      has_shared |= fd_submit->bos[i]->shared;
+   }
+   simple_mtx_unlock(&table_lock);
+
+   fd_submit->out_fence   = out_fence;
+   fd_submit->in_fence_fd = (in_fence_fd == -1) ?
+         -1 : os_dupfd_cloexec(in_fence_fd);
+
+   return has_shared;
+}
+
+static void
+fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
+{
+   struct fd_submit *submit = job;
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+   struct fd_pipe *pipe = submit->pipe;
+
+   fd_submit->flush_submit_list(&fd_submit->submit_list);
+
+   pthread_mutex_lock(&flush_mtx);
+   assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
+   pipe->last_submit_fence = fd_submit->base.fence;
+   pthread_cond_broadcast(&flush_cnd);
+   pthread_mutex_unlock(&flush_mtx);
+
+   DEBUG_MSG("finish: %u", submit->fence);
+}
+
+static void
+fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
+{
+   struct fd_submit *submit = job;
+   fd_submit_del(submit);
+}
+
+static int
+enqueue_submit_list(struct list_head *submit_list)
+{
+   struct fd_submit *submit = last_submit(submit_list);
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+
+   list_replace(submit_list, &fd_submit->submit_list);
+   list_inithead(submit_list);
+
+   struct util_queue_fence *fence;
+   if (fd_submit->out_fence) {
+      fence = &fd_submit->out_fence->ready;
+   } else {
+      util_queue_fence_init(&fd_submit->fence);
+      fence = &fd_submit->fence;
+   }
+
+   DEBUG_MSG("enqueue: %u", submit->fence);
+
+   util_queue_add_job(&submit->pipe->dev->submit_queue,
+                      submit, fence,
+                      fd_submit_sp_flush_execute,
+                      fd_submit_sp_flush_cleanup,
+                      0);
+
+   return 0;
+}
+
+static bool
+should_defer(struct fd_submit *submit)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+
+   /* if too many bo's, it may not be worth the CPU cost of submit merging: */
+   if (fd_submit->nr_bos > 30)
+      return false;
+
+   /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
+    * cmds before we exceed the size of the ringbuffer, which results in
+    * deadlock writing into the RB (ie. kernel doesn't finish writing into
+    * the RB so it doesn't kick the GPU to start consuming from the RB)
+    */
+   if (submit->pipe->dev->deferred_cmds > 128)
+      return false;
+
+   return true;
+}
+
+static int
+fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
+                   struct fd_submit_fence *out_fence)
+{
+   struct fd_device *dev = submit->pipe->dev;
+   struct fd_pipe *pipe = submit->pipe;
+
+   /* Acquire lock before flush_prep() because it is possible to race between
+    * this and pipe->flush():
+    */
+   simple_mtx_lock(&dev->submit_lock);
+
+   /* If there are deferred submits from another fd_pipe, flush them now,
+    * since we can't merge submits from different submitqueue's (ie. they
+    * could have different priority, etc)
+    */
+   if (!list_is_empty(&dev->deferred_submits) &&
+       (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
+      struct list_head submit_list;
+
+      list_replace(&dev->deferred_submits, &submit_list);
+      list_inithead(&dev->deferred_submits);
+      dev->deferred_cmds = 0;
+
+      enqueue_submit_list(&submit_list);
+   }
+
+   list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
+
+   bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
+
+   assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
+   pipe->last_enqueue_fence = submit->fence;
+
+   /* If we don't need an out-fence, we can defer the submit.
+    *
+    * TODO we could defer submits with in-fence as well.. if we took our own
+    * reference to the fd, and merged all the in-fence-fd's when we flush the
+    * deferred submits
+    */
+   if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
+      DEBUG_MSG("defer: %u", submit->fence);
+      dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
+      assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
+      simple_mtx_unlock(&dev->submit_lock);
+
+      return 0;
+   }
+
+   struct list_head submit_list;
+
+   list_replace(&dev->deferred_submits, &submit_list);
+   list_inithead(&dev->deferred_submits);
+   dev->deferred_cmds = 0;
+
+   simple_mtx_unlock(&dev->submit_lock);
+
+   return enqueue_submit_list(&submit_list);
+}
+
+void
+fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
+{
+   struct fd_device *dev = pipe->dev;
+   struct list_head submit_list;
+
+   DEBUG_MSG("flush: %u", fence);
+
+   list_inithead(&submit_list);
+
+   simple_mtx_lock(&dev->submit_lock);
+
+   assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
+
+   foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
+      /* We should never have submits from multiple pipes in the deferred
+       * list.  If we did, we couldn't compare their fence to our fence,
+       * since each fd_pipe is an independent timeline.
+       */
+      if (deferred_submit->pipe != pipe)
+         break;
+
+      if (fd_fence_after(deferred_submit->fence, fence))
+         break;
+
+      list_del(&deferred_submit->node);
+      list_addtail(&deferred_submit->node, &submit_list);
+      dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
+   }
+
+   assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
+
+   simple_mtx_unlock(&dev->submit_lock);
+
+   if (list_is_empty(&submit_list))
+      goto flush_sync;
+
+   enqueue_submit_list(&submit_list);
+
+flush_sync:
+   /* Once we are sure that we've enqueued at least up to the requested
+    * submit, we need to be sure that submitq has caught up and flushed
+    * them to the kernel
+    */
+   pthread_mutex_lock(&flush_mtx);
+   while (fd_fence_before(pipe->last_submit_fence, fence)) {
+      pthread_cond_wait(&flush_cnd, &flush_mtx);
+   }
+   pthread_mutex_unlock(&flush_mtx);
+}
+
+static void
+fd_submit_sp_destroy(struct fd_submit *submit)
+{
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
+
+   if (fd_submit->suballoc_ring)
+      fd_ringbuffer_del(fd_submit->suballoc_ring);
+
+   _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
+
+   // TODO it would be nice to have a way to debug_assert() if all
+   // rb's haven't been free'd back to the slab, because that is
+   // an indication that we are leaking bo's
+   slab_destroy_child(&fd_submit->ring_pool);
+
+   for (unsigned i = 0; i < fd_submit->nr_bos; i++)
+      fd_bo_del(fd_submit->bos[i]);
+
+   free(fd_submit->bos);
+   free(fd_submit);
+}
+
+static const struct fd_submit_funcs submit_funcs = {
+   .new_ringbuffer = fd_submit_sp_new_ringbuffer,
+   .flush = fd_submit_sp_flush,
+   .destroy = fd_submit_sp_destroy,
+};
+
+struct fd_submit *
+fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
+{
+   struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
+   struct fd_submit *submit;
+
+   fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                 _mesa_key_pointer_equal);
+
+   slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
+
+   fd_submit->flush_submit_list = flush_submit_list;
+
+   submit = &fd_submit->base;
+   submit->funcs = &submit_funcs;
+
+   return submit;
+}
+
+void
+fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
+{
+   // TODO tune size:
+   slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
+}
+
+void
+fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
+{
+   if (pipe->ring_pool.num_elements)
+      slab_destroy_parent(&pipe->ring_pool);
+}
+
+static void
+finalize_current_cmd(struct fd_ringbuffer *ring)
+{
+   debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
+   APPEND(&fd_ring->u, cmds,
+          (struct fd_cmd_sp){
+             .ring_bo = fd_bo_ref(fd_ring->ring_bo),
+             .size = offset_bytes(ring->cur, ring->start),
+          });
+}
+
+static void
+fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
+{
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
+   struct fd_pipe *pipe = fd_ring->u.submit->pipe;
+
+   debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
+
+   finalize_current_cmd(ring);
+
+   fd_bo_del(fd_ring->ring_bo);
+   fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
+
+   ring->start = fd_bo_map(fd_ring->ring_bo);
+   ring->end = &(ring->start[size / 4]);
+   ring->cur = ring->start;
+   ring->size = size;
+}
+
+static inline bool
+fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
+{
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
+
+   for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
+      if (fd_ring->u.reloc_bos[i] == bo)
+         return true;
+   }
+   return false;
+}
+
+#define PTRSZ 64
+#include "freedreno_ringbuffer_sp_reloc.h"
+#undef PTRSZ
+#define PTRSZ 32
+#include "freedreno_ringbuffer_sp_reloc.h"
+#undef PTRSZ
+
+static uint32_t
+fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
+{
+   if (ring->flags & FD_RINGBUFFER_GROWABLE)
+      return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
+   return 1;
+}
+
+static bool
+fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
+{
+   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
+   struct fd_submit *submit = fd_ring->u.submit;
+
+   if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
+      return false;
+   }
+
+   return true;
+}
+
+static void
+fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
+{
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
+
+   fd_bo_del(fd_ring->ring_bo);
+
+   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+      for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
+         fd_bo_del(fd_ring->u.reloc_bos[i]);
+      }
+      free(fd_ring->u.reloc_bos);
+
+      free(fd_ring);
+   } else {
+      struct fd_submit *submit = fd_ring->u.submit;
+
+      for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
+         fd_bo_del(fd_ring->u.cmds[i].ring_bo);
+      }
+      free(fd_ring->u.cmds);
+
+      slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
+   }
+}
+
+static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
+   .grow = fd_ringbuffer_sp_grow,
+   .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
+   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
+   .cmd_count = fd_ringbuffer_sp_cmd_count,
+   .check_size = fd_ringbuffer_sp_check_size,
+   .destroy = fd_ringbuffer_sp_destroy,
+};
+
+static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
+   .grow = fd_ringbuffer_sp_grow,
+   .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
+   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
+   .cmd_count = fd_ringbuffer_sp_cmd_count,
+   .destroy = fd_ringbuffer_sp_destroy,
+};
+
+static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
+   .grow = fd_ringbuffer_sp_grow,
+   .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
+   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
+   .cmd_count = fd_ringbuffer_sp_cmd_count,
+   .check_size = fd_ringbuffer_sp_check_size,
+   .destroy = fd_ringbuffer_sp_destroy,
+};
+
+static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
+   .grow = fd_ringbuffer_sp_grow,
+   .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
+   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
+   .cmd_count = fd_ringbuffer_sp_cmd_count,
+   .destroy = fd_ringbuffer_sp_destroy,
+};
+
+static inline struct fd_ringbuffer *
+fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
+                      enum fd_ringbuffer_flags flags)
+{
+   struct fd_ringbuffer *ring = &fd_ring->base;
+
+   debug_assert(fd_ring->ring_bo);
+
+   uint8_t *base = fd_bo_map(fd_ring->ring_bo);
+   ring->start = (void *)(base + fd_ring->offset);
+   ring->end = &(ring->start[size / 4]);
+   ring->cur = ring->start;
+
+   ring->size = size;
+   ring->flags = flags;
+
+   if (flags & _FD_RINGBUFFER_OBJECT) {
+      if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) {
+         ring->funcs = &ring_funcs_obj_64;
+      } else {
+         ring->funcs = &ring_funcs_obj_32;
+      }
+   } else {
+      if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) {
+         ring->funcs = &ring_funcs_nonobj_64;
+      } else {
+         ring->funcs = &ring_funcs_nonobj_32;
+      }
+   }
+
+   // TODO initializing these could probably be conditional on flags
+   // since unneed for FD_RINGBUFFER_STAGING case..
+   fd_ring->u.cmds = NULL;
+   fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
+
+   fd_ring->u.reloc_bos = NULL;
+   fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
+
+   return ring;
+}
+
+struct fd_ringbuffer *
+fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
+{
+   struct fd_device *dev = pipe->dev;
+   struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
+
+   /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
+    * can happen both on the frontend (most CSOs) and the driver thread (a6xx
+    * cached tex state, for example)
+    */
+   simple_mtx_lock(&dev->suballoc_lock);
+
+   /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
+   fd_ring->offset = align(dev->suballoc_offset, 64);
+   if (!dev->suballoc_bo ||
+       fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
+      if (dev->suballoc_bo)
+         fd_bo_del(dev->suballoc_bo);
+      dev->suballoc_bo =
+         fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
+      fd_ring->offset = 0;
+   }
+
+   fd_ring->u.pipe = pipe;
+   fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
+   fd_ring->base.refcnt = 1;
+
+   dev->suballoc_offset = fd_ring->offset + size;
+
+   simple_mtx_unlock(&dev->suballoc_lock);
+
+   return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
+}
diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.h b/src/freedreno/drm/freedreno_ringbuffer_sp.h
new file mode 100644
index 0000000..b24eb01
--- /dev/null
+++ b/src/freedreno/drm/freedreno_ringbuffer_sp.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RINGBUFFER_SP_H_
+#define FREEDRENO_RINGBUFFER_SP_H_
+
+#include <assert.h>
+#include <inttypes.h>
+#include <pthread.h>
+
+#include "util/hash_table.h"
+#include "util/os_file.h"
+#include "util/slab.h"
+
+#include "freedreno_priv.h"
+#include "freedreno_ringbuffer.h"
+
+/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
+ * by avoiding the additional tracking necessary to build cmds/relocs tables
+ * (but still builds a bos table)
+ */
+
+typedef int (*flush_submit_list_fn)(struct list_head *submit_list);
+
+struct fd_submit_sp {
+   struct fd_submit base;
+
+   DECLARE_ARRAY(struct fd_bo *, bos);
+
+   /* maps fd_bo to idx in bos table: */
+   struct hash_table *bo_table;
+
+   struct slab_child_pool ring_pool;
+
+   /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
+    * the same underlying bo)..
+    *
+    * We also rely on previous stateobj having been fully constructed
+    * so we can reclaim extra space at it's end.
+    */
+   struct fd_ringbuffer *suballoc_ring;
+
+   /* Flush args, potentially attached to the last submit in the list
+    * of submits to merge:
+    */
+   int in_fence_fd;
+   struct fd_submit_fence *out_fence;
+
+   /* State for enqueued submits:
+    */
+   struct list_head submit_list;   /* includes this submit as last element */
+
+   /* Used in case out_fence==NULL: */
+   struct util_queue_fence fence;
+
+   flush_submit_list_fn flush_submit_list;
+};
+FD_DEFINE_CAST(fd_submit, fd_submit_sp);
+
+/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
+ * and sizes.  Ie. a finalized buffer can have no more commands appended to
+ * it.
+ */
+struct fd_cmd_sp {
+   struct fd_bo *ring_bo;
+   unsigned size;
+};
+
+struct fd_ringbuffer_sp {
+   struct fd_ringbuffer base;
+
+   /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
+   unsigned offset;
+
+   union {
+      /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
+       * this one
+       */
+      struct {
+         struct fd_pipe *pipe;
+         DECLARE_ARRAY(struct fd_bo *, reloc_bos);
+      };
+      /* for other cases: */
+      struct {
+         struct fd_submit *submit;
+         DECLARE_ARRAY(struct fd_cmd_sp, cmds);
+      };
+   } u;
+
+   struct fd_bo *ring_bo;
+};
+FD_DEFINE_CAST(fd_ringbuffer, fd_ringbuffer_sp);
+
+void fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence);
+uint32_t fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo);
+struct fd_submit *fd_submit_sp_new(struct fd_pipe *pipe,
+                                   flush_submit_list_fn flush_submit_list);
+void fd_pipe_sp_ringpool_init(struct fd_pipe *pipe);
+void fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe);
+struct fd_ringbuffer *fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size);
+
+#endif /* FREEDRENO_RINGBUFFER_SP_H_ */
diff --git a/src/freedreno/drm/msm/msm_ringbuffer_sp.h b/src/freedreno/drm/freedreno_ringbuffer_sp_reloc.h
similarity index 62%
rename from src/freedreno/drm/msm/msm_ringbuffer_sp.h
rename to src/freedreno/drm/freedreno_ringbuffer_sp_reloc.h
index 8b8f61b..9374681 100644
--- a/src/freedreno/drm/msm/msm_ringbuffer_sp.h
+++ b/src/freedreno/drm/freedreno_ringbuffer_sp_reloc.h
@@ -40,28 +40,28 @@ static void X(emit_reloc_common)(struct fd_ringbuffer *ring,
 #endif
 }
 
-static void X(msm_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
+static void X(fd_ringbuffer_sp_emit_reloc_nonobj)(struct fd_ringbuffer *ring,
                                                    const struct fd_reloc *reloc)
 {
    X(emit_reloc_common)(ring, reloc);
 
    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
 
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
 
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
 
-   msm_submit_append_bo(msm_submit, reloc->bo);
+   fd_submit_append_bo(fd_submit, reloc->bo);
 }
 
-static void X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
+static void X(fd_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
                                                 const struct fd_reloc *reloc)
 {
    X(emit_reloc_common)(ring, reloc);
 
    assert(ring->flags & _FD_RINGBUFFER_OBJECT);
 
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
 
    /* Avoid emitting duplicate BO references into the list.  Ringbuffer
     * objects are long-lived, so this saves ongoing work at draw time in
@@ -69,60 +69,60 @@ static void X(msm_ringbuffer_sp_emit_reloc_obj)(struct fd_ringbuffer *ring,
     * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
     * hurt much.
     */
-   if (!msm_ringbuffer_references_bo(ring, reloc->bo)) {
-      APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
+   if (!fd_ringbuffer_references_bo(ring, reloc->bo)) {
+      APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(reloc->bo));
    }
 }
 
-static uint32_t X(msm_ringbuffer_sp_emit_reloc_ring)(
+static uint32_t X(fd_ringbuffer_sp_emit_reloc_ring)(
    struct fd_ringbuffer *ring, struct fd_ringbuffer *target, uint32_t cmd_idx)
 {
-   struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
+   struct fd_ringbuffer_sp *fd_target = to_fd_ringbuffer_sp(target);
    struct fd_bo *bo;
    uint32_t size;
 
    if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
-       (cmd_idx < msm_target->u.nr_cmds)) {
-      bo = msm_target->u.cmds[cmd_idx].ring_bo;
-      size = msm_target->u.cmds[cmd_idx].size;
+       (cmd_idx < fd_target->u.nr_cmds)) {
+      bo = fd_target->u.cmds[cmd_idx].ring_bo;
+      size = fd_target->u.cmds[cmd_idx].size;
    } else {
-      bo = msm_target->ring_bo;
+      bo = fd_target->ring_bo;
       size = offset_bytes(target->cur, target->start);
    }
 
    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-      X(msm_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
+      X(fd_ringbuffer_sp_emit_reloc_obj)(ring, &(struct fd_reloc){
                 .bo = bo,
-                .iova = bo->iova + msm_target->offset,
-                .offset = msm_target->offset,
+                .iova = bo->iova + fd_target->offset,
+                .offset = fd_target->offset,
              });
    } else {
-      X(msm_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
+      X(fd_ringbuffer_sp_emit_reloc_nonobj)(ring, &(struct fd_reloc){
                 .bo = bo,
-                .iova = bo->iova + msm_target->offset,
-                .offset = msm_target->offset,
+                .iova = bo->iova + fd_target->offset,
+                .offset = fd_target->offset,
              });
    }
 
    if (!(target->flags & _FD_RINGBUFFER_OBJECT))
       return size;
 
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
 
    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-      for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
-         struct fd_bo *target_bo = msm_target->u.reloc_bos[i];
-         if (!msm_ringbuffer_references_bo(ring, target_bo))
-            APPEND(&msm_ring->u, reloc_bos, fd_bo_ref(target_bo));
+      for (unsigned i = 0; i < fd_target->u.nr_reloc_bos; i++) {
+         struct fd_bo *target_bo = fd_target->u.reloc_bos[i];
+         if (!fd_ringbuffer_references_bo(ring, target_bo))
+            APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(target_bo));
       }
    } else {
       // TODO it would be nice to know whether we have already
       // seen this target before.  But hopefully we hit the
       // append_bo() fast path enough for this to not matter:
-      struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
+      struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
 
-      for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
-         msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
+      for (unsigned i = 0; i < fd_target->u.nr_reloc_bos; i++) {
+         fd_submit_append_bo(fd_submit, fd_target->u.reloc_bos[i]);
       }
    }
 
diff --git a/src/freedreno/drm/meson.build b/src/freedreno/drm/meson.build
index 1284f5f..7712422 100644
--- a/src/freedreno/drm/meson.build
+++ b/src/freedreno/drm/meson.build
@@ -27,6 +27,9 @@ libfreedreno_drm_files = files(
   'freedreno_priv.h',
   'freedreno_ringbuffer.c',
   'freedreno_ringbuffer.h',
+  'freedreno_ringbuffer_sp.c',
+  'freedreno_ringbuffer_sp.h',
+  'freedreno_ringbuffer_sp_reloc.h',
 )
 libfreedreno_drm_flags = []
 libfreedreno_drm_includes = [
@@ -46,7 +49,6 @@ libfreedreno_drm_msm_files = files(
   'msm/msm_priv.h',
   'msm/msm_ringbuffer.c',
   'msm/msm_ringbuffer_sp.c',
-  'msm/msm_ringbuffer_sp.h',
 )
 libfreedreno_drm_files += libfreedreno_drm_msm_files
 
diff --git a/src/freedreno/drm/msm/msm_pipe.c b/src/freedreno/drm/msm/msm_pipe.c
index babb89b..775fd59 100644
--- a/src/freedreno/drm/msm/msm_pipe.c
+++ b/src/freedreno/drm/msm/msm_pipe.c
@@ -26,6 +26,7 @@
 
 #include "util/slab.h"
 
+#include "freedreno_ringbuffer_sp.h"
 #include "msm_priv.h"
 
 static int
@@ -199,14 +200,14 @@ msm_pipe_destroy(struct fd_pipe *pipe)
    struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
 
    close_submitqueue(pipe, msm_pipe->queue_id);
-   msm_pipe_sp_ringpool_fini(pipe);
+   fd_pipe_sp_ringpool_fini(pipe);
    free(msm_pipe);
 }
 
 static const struct fd_pipe_funcs sp_funcs = {
-   .ringbuffer_new_object = msm_ringbuffer_sp_new_object,
+   .ringbuffer_new_object = fd_ringbuffer_sp_new_object,
    .submit_new = msm_submit_sp_new,
-   .flush = msm_pipe_sp_flush,
+   .flush = fd_pipe_sp_flush,
    .get_param = msm_pipe_get_param,
    .set_param = msm_pipe_set_param,
    .wait = msm_pipe_wait,
@@ -281,7 +282,7 @@ msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
    if (open_submitqueue(pipe, prio))
       goto fail;
 
-   msm_pipe_sp_ringpool_init(pipe);
+   fd_pipe_sp_ringpool_init(pipe);
 
    return pipe;
 fail:
diff --git a/src/freedreno/drm/msm/msm_priv.h b/src/freedreno/drm/msm/msm_priv.h
index ca2bca8..b41d1ff 100644
--- a/src/freedreno/drm/msm/msm_priv.h
+++ b/src/freedreno/drm/msm/msm_priv.h
@@ -68,15 +68,9 @@ struct fd_pipe *msm_pipe_new(struct fd_device *dev, enum fd_pipe_id id,
 
 struct fd_ringbuffer *msm_ringbuffer_new_object(struct fd_pipe *pipe,
                                                 uint32_t size);
-struct fd_ringbuffer *msm_ringbuffer_sp_new_object(struct fd_pipe *pipe,
-                                                   uint32_t size);
 
 struct fd_submit *msm_submit_new(struct fd_pipe *pipe);
 struct fd_submit *msm_submit_sp_new(struct fd_pipe *pipe);
-void msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence);
-
-void msm_pipe_sp_ringpool_init(struct fd_pipe *pipe);
-void msm_pipe_sp_ringpool_fini(struct fd_pipe *pipe);
 
 struct msm_bo {
    struct fd_bo base;
diff --git a/src/freedreno/drm/msm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm/msm_ringbuffer_sp.c
index a607f0e..5fd0dfe 100644
--- a/src/freedreno/drm/msm/msm_ringbuffer_sp.c
+++ b/src/freedreno/drm/msm/msm_ringbuffer_sp.c
@@ -28,249 +28,16 @@
 #include <inttypes.h>
 #include <pthread.h>
 
-#include "util/hash_table.h"
 #include "util/os_file.h"
-#include "util/slab.h"
 
-#include "drm/freedreno_ringbuffer.h"
+#include "drm/freedreno_ringbuffer_sp.h"
 #include "msm_priv.h"
 
-/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
- * by avoiding the additional tracking necessary to build cmds/relocs tables
- * (but still builds a bos table)
- */
-
-#define INIT_SIZE 0x1000
-
-#define SUBALLOC_SIZE (32 * 1024)
-
-/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
- * instead use a condition-variable.  Note that pipe->flush() is not expected
- * to be a common/hot path.
- */
-static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
-
-
-struct msm_submit_sp {
-   struct fd_submit base;
-
-   DECLARE_ARRAY(struct fd_bo *, bos);
-
-   /* maps fd_bo to idx in bos table: */
-   struct hash_table *bo_table;
-
-   struct slab_child_pool ring_pool;
-
-   /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
-    * the same underlying bo)..
-    *
-    * We also rely on previous stateobj having been fully constructed
-    * so we can reclaim extra space at it's end.
-    */
-   struct fd_ringbuffer *suballoc_ring;
-
-   /* Flush args, potentially attached to the last submit in the list
-    * of submits to merge:
-    */
-   int in_fence_fd;
-   struct fd_submit_fence *out_fence;
-
-   /* State for enqueued submits:
-    */
-   struct list_head submit_list;   /* includes this submit as last element */
-
-   /* Used in case out_fence==NULL: */
-   struct util_queue_fence fence;
-};
-FD_DEFINE_CAST(fd_submit, msm_submit_sp);
-
-/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
- * and sizes.  Ie. a finalized buffer can have no more commands appended to
- * it.
- */
-struct msm_cmd_sp {
-   struct fd_bo *ring_bo;
-   unsigned size;
-};
-
-struct msm_ringbuffer_sp {
-   struct fd_ringbuffer base;
-
-   /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
-   unsigned offset;
-
-   union {
-      /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
-       * this one
-       */
-      struct {
-         struct fd_pipe *pipe;
-         DECLARE_ARRAY(struct fd_bo *, reloc_bos);
-      };
-      /* for other cases: */
-      struct {
-         struct fd_submit *submit;
-         DECLARE_ARRAY(struct msm_cmd_sp, cmds);
-      };
-   } u;
-
-   struct fd_bo *ring_bo;
-};
-FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
-
-static void finalize_current_cmd(struct fd_ringbuffer *ring);
-static struct fd_ringbuffer *
-msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
-                       enum fd_ringbuffer_flags flags);
-
-/* add (if needed) bo to submit and return index: */
-static uint32_t
-msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
-{
-   uint32_t idx;
-
-   /* NOTE: it is legal to use the same bo on different threads for
-    * different submits.  But it is not legal to use the same submit
-    * from different threads.
-    */
-   idx = READ_ONCE(bo->idx);
-
-   if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
-      uint32_t hash = _mesa_hash_pointer(bo);
-      struct hash_entry *entry;
-
-      entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
-      if (entry) {
-         /* found */
-         idx = (uint32_t)(uintptr_t)entry->data;
-      } else {
-         idx = APPEND(submit, bos, fd_bo_ref(bo));
-
-         _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
-                                            (void *)(uintptr_t)idx);
-      }
-      bo->idx = idx;
-   }
-
-   return idx;
-}
-
-static void
-msm_submit_suballoc_ring_bo(struct fd_submit *submit,
-                            struct msm_ringbuffer_sp *msm_ring, uint32_t size)
-{
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-   unsigned suballoc_offset = 0;
-   struct fd_bo *suballoc_bo = NULL;
-
-   if (msm_submit->suballoc_ring) {
-      struct msm_ringbuffer_sp *suballoc_ring =
-         to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
-
-      suballoc_bo = suballoc_ring->ring_bo;
-      suballoc_offset =
-         fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset;
-
-      suballoc_offset = align(suballoc_offset, 0x10);
-
-      if ((size + suballoc_offset) > suballoc_bo->size) {
-         suballoc_bo = NULL;
-      }
-   }
-
-   if (!suballoc_bo) {
-      // TODO possibly larger size for streaming bo?
-      msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
-      msm_ring->offset = 0;
-   } else {
-      msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
-      msm_ring->offset = suballoc_offset;
-   }
-
-   struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
-
-   msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
-
-   if (old_suballoc_ring)
-      fd_ringbuffer_del(old_suballoc_ring);
-}
-
-static struct fd_ringbuffer *
-msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
-                             enum fd_ringbuffer_flags flags)
-{
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-   struct msm_ringbuffer_sp *msm_ring;
-
-   msm_ring = slab_alloc(&msm_submit->ring_pool);
-
-   msm_ring->u.submit = submit;
-
-   /* NOTE: needs to be before _suballoc_ring_bo() since it could
-    * increment the refcnt of the current ring
-    */
-   msm_ring->base.refcnt = 1;
-
-   if (flags & FD_RINGBUFFER_STREAMING) {
-      msm_submit_suballoc_ring_bo(submit, msm_ring, size);
-   } else {
-      if (flags & FD_RINGBUFFER_GROWABLE)
-         size = INIT_SIZE;
-
-      msm_ring->offset = 0;
-      msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
-   }
-
-   if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
-      return NULL;
-
-   return &msm_ring->base;
-}
-
-/**
- * Prepare submit for flush, always done synchronously.
- *
- * 1) Finalize primary ringbuffer, at this point no more cmdstream may
- *    be written into it, since from the PoV of the upper level driver
- *    the submit is flushed, even if deferred
- * 2) Add cmdstream bos to bos table
- * 3) Update bo fences
- */
-static bool
-msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
-                         struct fd_submit_fence *out_fence)
-{
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-   bool has_shared = false;
-
-   finalize_current_cmd(submit->primary);
-
-   struct msm_ringbuffer_sp *primary =
-      to_msm_ringbuffer_sp(submit->primary);
-
-   for (unsigned i = 0; i < primary->u.nr_cmds; i++)
-      msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo);
-
-   simple_mtx_lock(&table_lock);
-   for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
-      fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
-      has_shared |= msm_submit->bos[i]->shared;
-   }
-   simple_mtx_unlock(&table_lock);
-
-   msm_submit->out_fence   = out_fence;
-   msm_submit->in_fence_fd = (in_fence_fd == -1) ?
-         -1 : os_dupfd_cloexec(in_fence_fd);
-
-   return has_shared;
-}
-
 static int
 flush_submit_list(struct list_head *submit_list)
 {
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list));
-   struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe);
+   struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list));
+   struct msm_pipe *msm_pipe = to_msm_pipe(fd_submit->base.pipe);
    struct drm_msm_gem_submit req = {
       .flags = msm_pipe->pipe,
       .queueid = msm_pipe->queue_id,
@@ -284,7 +51,7 @@ flush_submit_list(struct list_head *submit_list)
     */
    foreach_submit (submit, submit_list) {
       assert(submit->pipe == &msm_pipe->base);
-      nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds;
+      nr_cmds += to_fd_ringbuffer_sp(submit->primary)->u.nr_cmds;
    }
 
    struct drm_msm_gem_submit_cmd cmds[nr_cmds];
@@ -295,13 +62,13 @@ flush_submit_list(struct list_head *submit_list)
     * list, merge their bo tables into the last submit.
     */
    foreach_submit_safe (submit, submit_list) {
-      struct msm_ringbuffer_sp *deferred_primary =
-         to_msm_ringbuffer_sp(submit->primary);
+      struct fd_ringbuffer_sp *deferred_primary =
+         to_fd_ringbuffer_sp(submit->primary);
 
       for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
          cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
          cmds[cmd_idx].submit_idx =
-               msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo);
+               fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
          cmds[cmd_idx].submit_offset = deferred_primary->offset;
          cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
          cmds[cmd_idx].pad = 0;
@@ -318,13 +85,13 @@ flush_submit_list(struct list_head *submit_list)
          break;
       }
 
-      struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit);
-      for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) {
+      struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
+      for (unsigned i = 0; i < fd_deferred_submit->nr_bos; i++) {
          /* Note: if bo is used in both the current submit and the deferred
           * submit being merged, we expect to hit the fast-path as we add it
           * to the current submit:
           */
-         msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]);
+         fd_submit_append_bo(fd_submit, fd_deferred_submit->bos[i]);
       }
 
       /* Now that the cmds/bos have been transfered over to the current submit,
@@ -334,9 +101,9 @@ flush_submit_list(struct list_head *submit_list)
       fd_submit_del(submit);
    }
 
-   if (msm_submit->in_fence_fd != -1) {
+   if (fd_submit->in_fence_fd != -1) {
       req.flags |= MSM_SUBMIT_FENCE_FD_IN;
-      req.fence_fd = msm_submit->in_fence_fd;
+      req.fence_fd = fd_submit->in_fence_fd;
       msm_pipe->no_implicit_sync = true;
    }
 
@@ -344,7 +111,7 @@ flush_submit_list(struct list_head *submit_list)
       req.flags |= MSM_SUBMIT_NO_IMPLICIT;
    }
 
-   if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) {
+   if (fd_submit->out_fence && fd_submit->out_fence->use_fence_fd) {
       req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
    }
 
@@ -354,24 +121,24 @@ flush_submit_list(struct list_head *submit_list)
     * bound to limit on-stack allocation to 4k:
     */
    const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
-   bool bos_on_stack = msm_submit->nr_bos < bo_limit;
+   bool bos_on_stack = fd_submit->nr_bos < bo_limit;
    struct drm_msm_gem_submit_bo
-      _submit_bos[bos_on_stack ? msm_submit->nr_bos : 0];
+      _submit_bos[bos_on_stack ? fd_submit->nr_bos : 0];
    struct drm_msm_gem_submit_bo *submit_bos;
    if (bos_on_stack) {
       submit_bos = _submit_bos;
    } else {
-      submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
+      submit_bos = malloc(fd_submit->nr_bos * sizeof(submit_bos[0]));
    }
 
-   for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
-      submit_bos[i].flags = msm_submit->bos[i]->reloc_flags;
-      submit_bos[i].handle = msm_submit->bos[i]->handle;
+   for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
+      submit_bos[i].flags = fd_submit->bos[i]->reloc_flags;
+      submit_bos[i].handle = fd_submit->bos[i]->handle;
       submit_bos[i].presumed = 0;
    }
 
    req.bos = VOID2U64(submit_bos);
-   req.nr_bos = msm_submit->nr_bos;
+   req.nr_bos = fd_submit->nr_bos;
    req.cmds = VOID2U64(cmds);
    req.nr_cmds = nr_cmds;
 
@@ -382,472 +149,28 @@ flush_submit_list(struct list_head *submit_list)
    if (ret) {
       ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
       msm_dump_submit(&req);
-   } else if (!ret && msm_submit->out_fence) {
-      msm_submit->out_fence->fence.kfence = req.fence;
-      msm_submit->out_fence->fence.ufence = msm_submit->base.fence;
-      msm_submit->out_fence->fence_fd = req.fence_fd;
+   } else if (!ret && fd_submit->out_fence) {
+      fd_submit->out_fence->fence.kfence = req.fence;
+      fd_submit->out_fence->fence.ufence = fd_submit->base.fence;
+      fd_submit->out_fence->fence_fd = req.fence_fd;
    }
 
    if (!bos_on_stack)
       free(submit_bos);
 
-   pthread_mutex_lock(&flush_mtx);
-   assert(fd_fence_before(msm_pipe->base.last_submit_fence, msm_submit->base.fence));
-   msm_pipe->base.last_submit_fence = msm_submit->base.fence;
-   pthread_cond_broadcast(&flush_cnd);
-   pthread_mutex_unlock(&flush_mtx);
-
-   if (msm_submit->in_fence_fd != -1)
-      close(msm_submit->in_fence_fd);
+   if (fd_submit->in_fence_fd != -1)
+      close(fd_submit->in_fence_fd);
 
    return ret;
 }
 
-static void
-msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
-{
-   struct fd_submit *submit = job;
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
-   flush_submit_list(&msm_submit->submit_list);
-
-   DEBUG_MSG("finish: %u", submit->fence);
-}
-
-static void
-msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
-{
-   struct fd_submit *submit = job;
-   fd_submit_del(submit);
-}
-
-static int
-enqueue_submit_list(struct list_head *submit_list)
-{
-   struct fd_submit *submit = last_submit(submit_list);
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
-   list_replace(submit_list, &msm_submit->submit_list);
-   list_inithead(submit_list);
-
-   struct util_queue_fence *fence;
-   if (msm_submit->out_fence) {
-      fence = &msm_submit->out_fence->ready;
-   } else {
-      util_queue_fence_init(&msm_submit->fence);
-      fence = &msm_submit->fence;
-   }
-
-   DEBUG_MSG("enqueue: %u", submit->fence);
-
-   util_queue_add_job(&submit->pipe->dev->submit_queue,
-                      submit, fence,
-                      msm_submit_sp_flush_execute,
-                      msm_submit_sp_flush_cleanup,
-                      0);
-
-   return 0;
-}
-
-static bool
-should_defer(struct fd_submit *submit)
-{
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
-   /* if too many bo's, it may not be worth the CPU cost of submit merging: */
-   if (msm_submit->nr_bos > 30)
-      return false;
-
-   /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
-    * cmds before we exceed the size of the ringbuffer, which results in
-    * deadlock writing into the RB (ie. kernel doesn't finish writing into
-    * the RB so it doesn't kick the GPU to start consuming from the RB)
-    */
-   if (submit->pipe->dev->deferred_cmds > 128)
-      return false;
-
-   return true;
-}
-
-static int
-msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
-                    struct fd_submit_fence *out_fence)
-{
-   struct fd_device *dev = submit->pipe->dev;
-   struct fd_pipe *pipe = submit->pipe;
-
-   /* Acquire lock before flush_prep() because it is possible to race between
-    * this and pipe->flush():
-    */
-   simple_mtx_lock(&dev->submit_lock);
-
-   /* If there are deferred submits from another fd_pipe, flush them now,
-    * since we can't merge submits from different submitqueue's (ie. they
-    * could have different priority, etc)
-    */
-   if (!list_is_empty(&dev->deferred_submits) &&
-       (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
-      struct list_head submit_list;
-
-      list_replace(&dev->deferred_submits, &submit_list);
-      list_inithead(&dev->deferred_submits);
-      dev->deferred_cmds = 0;
-
-      enqueue_submit_list(&submit_list);
-   }
-
-   list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
-
-   bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
-
-   assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
-   pipe->last_enqueue_fence = submit->fence;
-
-   /* If we don't need an out-fence, we can defer the submit.
-    *
-    * TODO we could defer submits with in-fence as well.. if we took our own
-    * reference to the fd, and merged all the in-fence-fd's when we flush the
-    * deferred submits
-    */
-   if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
-      DEBUG_MSG("defer: %u", submit->fence);
-      dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
-      assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
-      simple_mtx_unlock(&dev->submit_lock);
-
-      return 0;
-   }
-
-   struct list_head submit_list;
-
-   list_replace(&dev->deferred_submits, &submit_list);
-   list_inithead(&dev->deferred_submits);
-   dev->deferred_cmds = 0;
-
-   simple_mtx_unlock(&dev->submit_lock);
-
-   return enqueue_submit_list(&submit_list);
-}
-
-void
-msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
-{
-   struct fd_device *dev = pipe->dev;
-   struct list_head submit_list;
-
-   DEBUG_MSG("flush: %u", fence);
-
-   list_inithead(&submit_list);
-
-   simple_mtx_lock(&dev->submit_lock);
-
-   assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
-
-   foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
-      /* We should never have submits from multiple pipes in the deferred
-       * list.  If we did, we couldn't compare their fence to our fence,
-       * since each fd_pipe is an independent timeline.
-       */
-      if (deferred_submit->pipe != pipe)
-         break;
-
-      if (fd_fence_after(deferred_submit->fence, fence))
-         break;
-
-      list_del(&deferred_submit->node);
-      list_addtail(&deferred_submit->node, &submit_list);
-      dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
-   }
-
-   assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
-
-   simple_mtx_unlock(&dev->submit_lock);
-
-   if (list_is_empty(&submit_list))
-      goto flush_sync;
-
-   enqueue_submit_list(&submit_list);
-
-flush_sync:
-   /* Once we are sure that we've enqueued at least up to the requested
-    * submit, we need to be sure that submitq has caught up and flushed
-    * them to the kernel
-    */
-   pthread_mutex_lock(&flush_mtx);
-   while (fd_fence_before(pipe->last_submit_fence, fence)) {
-      pthread_cond_wait(&flush_cnd, &flush_mtx);
-   }
-   pthread_mutex_unlock(&flush_mtx);
-}
-
-static void
-msm_submit_sp_destroy(struct fd_submit *submit)
-{
-   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
-   if (msm_submit->suballoc_ring)
-      fd_ringbuffer_del(msm_submit->suballoc_ring);
-
-   _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
-
-   // TODO it would be nice to have a way to debug_assert() if all
-   // rb's haven't been free'd back to the slab, because that is
-   // an indication that we are leaking bo's
-   slab_destroy_child(&msm_submit->ring_pool);
-
-   for (unsigned i = 0; i < msm_submit->nr_bos; i++)
-      fd_bo_del(msm_submit->bos[i]);
-
-   free(msm_submit->bos);
-   free(msm_submit);
-}
-
-static const struct fd_submit_funcs submit_funcs = {
-   .new_ringbuffer = msm_submit_sp_new_ringbuffer,
-   .flush = msm_submit_sp_flush,
-   .destroy = msm_submit_sp_destroy,
-};
-
 struct fd_submit *
 msm_submit_sp_new(struct fd_pipe *pipe)
 {
-   struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
-   struct fd_submit *submit;
-
-   msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal);
-
-   slab_create_child(&msm_submit->ring_pool, &pipe->ring_pool);
-
-   submit = &msm_submit->base;
-   submit->funcs = &submit_funcs;
-
-   return submit;
-}
-
-void
-msm_pipe_sp_ringpool_init(struct fd_pipe *pipe)
-{
-   // TODO tune size:
-   slab_create_parent(&pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
-}
-
-void
-msm_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
-{
-   if (pipe->ring_pool.num_elements)
-      slab_destroy_parent(&pipe->ring_pool);
-}
-
-static void
-finalize_current_cmd(struct fd_ringbuffer *ring)
-{
-   debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-   APPEND(&msm_ring->u, cmds,
-          (struct msm_cmd_sp){
-             .ring_bo = fd_bo_ref(msm_ring->ring_bo),
-             .size = offset_bytes(ring->cur, ring->start),
-          });
-}
-
-static void
-msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
-{
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-   struct fd_pipe *pipe = msm_ring->u.submit->pipe;
-
-   debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
-
-   finalize_current_cmd(ring);
-
-   fd_bo_del(msm_ring->ring_bo);
-   msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
-
-   ring->start = fd_bo_map(msm_ring->ring_bo);
-   ring->end = &(ring->start[size / 4]);
-   ring->cur = ring->start;
-   ring->size = size;
-}
-
-static inline bool
-msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
-{
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
-   for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
-      if (msm_ring->u.reloc_bos[i] == bo)
-         return true;
-   }
-   return false;
-}
-
-#define PTRSZ 64
-#include "msm_ringbuffer_sp.h"
-#undef PTRSZ
-#define PTRSZ 32
-#include "msm_ringbuffer_sp.h"
-#undef PTRSZ
-
-static uint32_t
-msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
-{
-   if (ring->flags & FD_RINGBUFFER_GROWABLE)
-      return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
-   return 1;
-}
-
-static bool
-msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
-{
-   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-   struct fd_submit *submit = msm_ring->u.submit;
-
-   if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
-      return false;
-   }
-
-   return true;
-}
-
-static void
-msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
-{
-   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
-   fd_bo_del(msm_ring->ring_bo);
-
-   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-      for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
-         fd_bo_del(msm_ring->u.reloc_bos[i]);
-      }
-      free(msm_ring->u.reloc_bos);
-
-      free(msm_ring);
-   } else {
-      struct fd_submit *submit = msm_ring->u.submit;
-
-      for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
-         fd_bo_del(msm_ring->u.cmds[i].ring_bo);
-      }
-      free(msm_ring->u.cmds);
-
-      slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
-   }
-}
-
-static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
-   .grow = msm_ringbuffer_sp_grow,
-   .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
-   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
-   .cmd_count = msm_ringbuffer_sp_cmd_count,
-   .check_size = msm_ringbuffer_sp_check_size,
-   .destroy = msm_ringbuffer_sp_destroy,
-};
-
-static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
-   .grow = msm_ringbuffer_sp_grow,
-   .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
-   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
-   .cmd_count = msm_ringbuffer_sp_cmd_count,
-   .destroy = msm_ringbuffer_sp_destroy,
-};
-
-static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
-   .grow = msm_ringbuffer_sp_grow,
-   .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
-   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
-   .cmd_count = msm_ringbuffer_sp_cmd_count,
-   .check_size = msm_ringbuffer_sp_check_size,
-   .destroy = msm_ringbuffer_sp_destroy,
-};
-
-static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
-   .grow = msm_ringbuffer_sp_grow,
-   .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
-   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
-   .cmd_count = msm_ringbuffer_sp_cmd_count,
-   .destroy = msm_ringbuffer_sp_destroy,
-};
-
-static inline struct fd_ringbuffer *
-msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
-                       enum fd_ringbuffer_flags flags)
-{
-   struct fd_ringbuffer *ring = &msm_ring->base;
-
    /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
    STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
    STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
    STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
 
-   debug_assert(msm_ring->ring_bo);
-
-   uint8_t *base = fd_bo_map(msm_ring->ring_bo);
-   ring->start = (void *)(base + msm_ring->offset);
-   ring->end = &(ring->start[size / 4]);
-   ring->cur = ring->start;
-
-   ring->size = size;
-   ring->flags = flags;
-
-   if (flags & _FD_RINGBUFFER_OBJECT) {
-      if (fd_dev_64b(&msm_ring->u.pipe->dev_id)) {
-         ring->funcs = &ring_funcs_obj_64;
-      } else {
-         ring->funcs = &ring_funcs_obj_32;
-      }
-   } else {
-      if (fd_dev_64b(&msm_ring->u.submit->pipe->dev_id)) {
-         ring->funcs = &ring_funcs_nonobj_64;
-      } else {
-         ring->funcs = &ring_funcs_nonobj_32;
-      }
-   }
-
-   // TODO initializing these could probably be conditional on flags
-   // since unneed for FD_RINGBUFFER_STAGING case..
-   msm_ring->u.cmds = NULL;
-   msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
-
-   msm_ring->u.reloc_bos = NULL;
-   msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
-
-   return ring;
-}
-
-struct fd_ringbuffer *
-msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
-{
-   struct fd_device *dev = pipe->dev;
-   struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
-
-   /* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation
-    * can happen both on the frontend (most CSOs) and the driver thread (a6xx
-    * cached tex state, for example)
-    */
-   simple_mtx_lock(&dev->suballoc_lock);
-
-   /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
-   msm_ring->offset = align(dev->suballoc_offset, 64);
-   if (!dev->suballoc_bo ||
-       msm_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
-      if (dev->suballoc_bo)
-         fd_bo_del(dev->suballoc_bo);
-      dev->suballoc_bo =
-         fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
-      msm_ring->offset = 0;
-   }
-
-   msm_ring->u.pipe = pipe;
-   msm_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
-   msm_ring->base.refcnt = 1;
-
-   dev->suballoc_offset = msm_ring->offset + size;
-
-   simple_mtx_unlock(&dev->suballoc_lock);
-
-   return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
+   return fd_submit_sp_new(pipe, flush_submit_list);
 }
-- 
2.7.4