From 419a154dbef839b920689bea72aa9af41b2b114f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Jun 2016 14:06:24 -0400 Subject: [PATCH] freedreno: support growable cmdstream buffers The issue that userspace needed to solve is that there is ~two orders of magnitude size difference in cmdstream buffers (both for gmem commands and for draw commands), and that the previous practice of allocating worst-case sizes is quite wasteful. Previously a submit would be constructed (for example) like: CMD TARGET DESCRIPTION g0 N gmem/tiling commands b0 Y binning commands d0 Y draw commands Which, after the one non-IB-target cmd buffer is inserted into the kernel controlled ringbuffer, looks like (not to scale): b0: d0: +-----+ +-----+ IB1 | ... | | ... | +-----+ +-----+ ^ ^ | | +-----+ +-+---------+ g0: | | | +----+----+----+----+----+----+---- IB0 | .. | IB | .. | IB | .. | IB | ... +----+----+----+----+----+----+---- ^ tile0 tile1 | +-----------+ userspace | ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel | ----+----+---- ringbuffer ... | IB | ... ----+----+---- Now, multiple physical cmdstream buffers per fd_ringbuffer are supported, so this becomes: CMD TARGET DESCRIPTION g0 N ... N gmem/tiling commands gN N b0 Y ... Y binning commands bN Y d0 Y ... Y draw commands dN Y Which, after the non-IB-target cmd buffers (g0..gN) are inserted into the kernel controlled ringbuffer, looks like: b0: b1 d0: d1 +-----+ +-----+ +-----+ +-----+ IB1 | ... | | ... | ... | ... | | ... | ... +-----+ +-----+ +-----+ +-----+ ^ ^ ^ ^ | | | | | +-+ | +-----+------+ +-----+ | | | | | | +--+----------+ | g0: | | | | | | +----+----+----+----+----+----+---+----+----+---- IB0 | .. | IB | IB | .. | IB | IB |.. | IB | IB |... +----+----+----+----+----+----+---+----+----+---- ^ tile0 tile1 | to b0 to b1 | | | to|d0 to|d1 | | +----+ | +-+-----------+ | | | | | | | +------+ | +-+-------------+ | | g1: | | | | | | | +----+----+----+----+----+----+---+----+----+---- IB0 | | .. | IB | IB | .. | IB | IB |.. | IB | IB |... | +----+----+----+----+----+----+---+----+----+---- | ^ tileX tileY | | | +-----------+ +-----------+ | userspace | | ~~~~~~~~~~~~~~~~~~~|~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel | | ----+----+----+---- ringbuffer ... | IB | IB | ... ----+----+----+---- Signed-off-by: Rob Clark --- freedreno/freedreno_priv.h | 6 +- freedreno/freedreno_ringbuffer.c | 32 ++++++-- freedreno/freedreno_ringbuffer.h | 2 + freedreno/kgsl/kgsl_ringbuffer.c | 7 +- freedreno/msm/msm_ringbuffer.c | 155 ++++++++++++++++++++++++++++++++------- 5 files changed, 166 insertions(+), 36 deletions(-) diff --git a/freedreno/freedreno_priv.h b/freedreno/freedreno_priv.h index 9737b32..cdfdbe8 100644 --- a/freedreno/freedreno_priv.h +++ b/freedreno/freedreno_priv.h @@ -134,12 +134,14 @@ struct fd_ringmarker { struct fd_ringbuffer_funcs { void * (*hostptr)(struct fd_ringbuffer *ring); int (*flush)(struct fd_ringbuffer *ring, uint32_t *last_start); + void (*grow)(struct fd_ringbuffer *ring, uint32_t size); void (*reset)(struct fd_ringbuffer *ring); void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); - void (*emit_reloc_ring)(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, + uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx, uint32_t submit_offset, uint32_t size); + uint32_t (*cmd_count)(struct fd_ringbuffer *ring); void (*destroy)(struct fd_ringbuffer *ring); }; diff --git a/freedreno/freedreno_ringbuffer.c b/freedreno/freedreno_ringbuffer.c index 34a06d8..22dafb3 100644 --- a/freedreno/freedreno_ringbuffer.c +++ b/freedreno/freedreno_ringbuffer.c @@ -45,10 +45,9 @@ fd_ringbuffer_new(struct fd_pipe *pipe, uint32_t size) if (!ring) return NULL; - ring->size = size; ring->pipe = pipe; ring->start = ring->funcs->hostptr(ring); - ring->end = &(ring->start[size/4]); + ring->end = &(ring->start[ring->size/4]); ring->cur = ring->last_start = ring->start; @@ -87,6 +86,22 @@ int fd_ringbuffer_flush(struct fd_ringbuffer *ring) return ring->funcs->flush(ring, ring->last_start); } +void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords) +{ + assert(ring->funcs->grow); /* unsupported on kgsl */ + + /* there is an upper bound on IB size, which appears to be 0x100000 */ + if (ring->size < 0x100000) + ring->size *= 2; + + ring->funcs->grow(ring, ring->size); + + ring->start = ring->funcs->hostptr(ring); + ring->end = &(ring->start[ring->size/4]); + + ring->cur = ring->last_start = ring->start; +} + uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring) { return ring->last_timestamp; @@ -108,7 +123,14 @@ void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, submit_offset = offset_bytes(target->cur, target->ring->start); size = offset_bytes(end->cur, target->cur); - ring->funcs->emit_reloc_ring(ring, target->ring, submit_offset, size); + ring->funcs->emit_reloc_ring(ring, target->ring, 0, submit_offset, size); +} + +uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring) +{ + if (!ring->funcs->cmd_count) + return 1; + return ring->funcs->cmd_count(ring); } uint32_t @@ -116,9 +138,7 @@ fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, struct fd_ringbuffer *target, uint32_t cmd_idx) { uint32_t size = offset_bytes(target->cur, target->start); - assert(cmd_idx == 0); - ring->funcs->emit_reloc_ring(ring, target, 0, size); - return size; + return ring->funcs->emit_reloc_ring(ring, target, cmd_idx, 0, size); } struct fd_ringmarker * fd_ringmarker_new(struct fd_ringbuffer *ring) diff --git a/freedreno/freedreno_ringbuffer.h b/freedreno/freedreno_ringbuffer.h index 643f50b..8899b5d 100644 --- a/freedreno/freedreno_ringbuffer.h +++ b/freedreno/freedreno_ringbuffer.h @@ -56,6 +56,7 @@ void fd_ringbuffer_set_parent(struct fd_ringbuffer *ring, struct fd_ringbuffer *parent); void fd_ringbuffer_reset(struct fd_ringbuffer *ring); int fd_ringbuffer_flush(struct fd_ringbuffer *ring); +void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords); uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring); static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring, @@ -77,6 +78,7 @@ struct fd_reloc { void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); will_be_deprecated void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, struct fd_ringmarker *target, struct fd_ringmarker *end); +uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring); uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, struct fd_ringbuffer *target, uint32_t cmd_idx); diff --git a/freedreno/kgsl/kgsl_ringbuffer.c b/freedreno/kgsl/kgsl_ringbuffer.c index a0bc9d0..7b3298a 100644 --- a/freedreno/kgsl/kgsl_ringbuffer.c +++ b/freedreno/kgsl/kgsl_ringbuffer.c @@ -173,12 +173,14 @@ static void kgsl_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, kgsl_pipe_add_submit(to_kgsl_pipe(ring->pipe), kgsl_bo); } -static void kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, +static uint32_t kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx, uint32_t submit_offset, uint32_t size) { struct kgsl_ringbuffer *target_ring = to_kgsl_ringbuffer(target); + assert(cmd_idx == 0); (*ring->cur++) = target_ring->bo->gpuaddr + submit_offset; + return size; } static void kgsl_ringbuffer_destroy(struct fd_ringbuffer *ring) @@ -213,6 +215,7 @@ drm_private struct fd_ringbuffer * kgsl_ringbuffer_new(struct fd_pipe *pipe, ring = &kgsl_ring->base; ring->funcs = &funcs; + ring->size = size; kgsl_ring->bo = kgsl_rb_bo_new(to_kgsl_pipe(pipe), size); if (!kgsl_ring->bo) { diff --git a/freedreno/msm/msm_ringbuffer.c b/freedreno/msm/msm_ringbuffer.c index 301ac66..86fc83e 100644 --- a/freedreno/msm/msm_ringbuffer.c +++ b/freedreno/msm/msm_ringbuffer.c @@ -40,12 +40,16 @@ * a backing bo, and a reloc table. */ struct msm_cmd { + struct list_head list; + struct fd_ringbuffer *ring; struct fd_bo *ring_bo; /* reloc's table: */ struct drm_msm_gem_submit_reloc *relocs; uint32_t nr_relocs, max_relocs; + + uint32_t size; }; struct msm_ringbuffer { @@ -75,10 +79,28 @@ struct msm_ringbuffer { struct msm_cmd **cmds; uint32_t nr_cmds, max_cmds; - /* current cmd-buffer: */ - struct msm_cmd *cmd; + /* List of physical cmdstream buffers (msm_cmd) assocated with this + * logical fd_ringbuffer. + * + * Note that this is different from msm_ringbuffer::cmds (which + * shadows msm_ringbuffer::submit::cmds for tracking submit ioctl + * related stuff, and *only* is tracked in the parent ringbuffer. + * And only has "completed" cmd buffers (ie. we already know the + * size) added via get_cmd(). + */ + struct list_head cmd_list; + + int is_growable; + unsigned cmd_count; }; +static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x) +{ + return (struct msm_ringbuffer *)x; +} + +#define INIT_SIZE 0x1000 + static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; drm_private extern pthread_mutex_t table_lock; @@ -118,12 +140,15 @@ static void ring_cmd_del(struct msm_cmd *cmd) { if (cmd->ring_bo) ring_bo_del(cmd->ring->pipe->dev, cmd->ring_bo); + list_del(&cmd->list); + to_msm_ringbuffer(cmd->ring)->cmd_count--; free(cmd->relocs); free(cmd); } static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size) { + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); struct msm_cmd *cmd = calloc(1, sizeof(*cmd)); if (!cmd) @@ -134,6 +159,9 @@ static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size) if (!cmd->ring_bo) goto fail; + list_addtail(&cmd->list, &msm_ring->cmd_list); + msm_ring->cmd_count++; + return cmd; fail: @@ -158,9 +186,11 @@ static void *grow(void *ptr, uint32_t nr, uint32_t *max, uint32_t sz) (x)->nr_ ## name ++; \ }) -static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x) +static struct msm_cmd *current_cmd(struct fd_ringbuffer *ring) { - return (struct msm_ringbuffer *)x; + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + assert(!LIST_IS_EMPTY(&msm_ring->cmd_list)); + return LIST_LAST_ENTRY(&msm_ring->cmd_list, struct msm_cmd, list); } static uint32_t append_bo(struct fd_ringbuffer *ring, struct fd_bo *bo) @@ -248,12 +278,13 @@ static void get_cmd(struct fd_ringbuffer *ring, struct msm_cmd *target_cmd, cmd->submit_offset = submit_offset; cmd->size = size; cmd->pad = 0; + + target_cmd->size = size; } static void * msm_ringbuffer_hostptr(struct fd_ringbuffer *ring) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - return fd_bo_map(msm_ring->cmd->ring_bo); + return fd_bo_map(current_cmd(ring)->ring_bo); } static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd, @@ -271,6 +302,15 @@ static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd, return i; } +static void delete_cmds(struct msm_ringbuffer *msm_ring) +{ + struct msm_cmd *cmd, *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(cmd, tmp, &msm_ring->cmd_list, list) { + ring_cmd_del(cmd); + } +} + static void flush_reset(struct fd_ringbuffer *ring) { struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); @@ -288,11 +328,36 @@ static void flush_reset(struct fd_ringbuffer *ring) target_cmd->nr_relocs = 0; } - msm_ring->cmd->nr_relocs = 0; msm_ring->submit.nr_cmds = 0; msm_ring->submit.nr_bos = 0; msm_ring->nr_cmds = 0; msm_ring->nr_bos = 0; + + if (msm_ring->is_growable) { + delete_cmds(msm_ring); + } else { + /* in old mode, just reset the # of relocs: */ + current_cmd(ring)->nr_relocs = 0; + } +} + +static void finalize_current_cmd(struct fd_ringbuffer *ring, uint32_t *last_start) +{ + uint32_t submit_offset, size, type; + struct fd_ringbuffer *parent; + + if (ring->parent) { + parent = ring->parent; + type = MSM_SUBMIT_CMD_IB_TARGET_BUF; + } else { + parent = ring; + type = MSM_SUBMIT_CMD_BUF; + } + + submit_offset = offset_bytes(last_start, ring->start); + size = offset_bytes(ring->cur, last_start); + + get_cmd(parent, current_cmd(ring), submit_offset, size, type); } static void dump_submit(struct msm_ringbuffer *msm_ring) @@ -323,13 +388,10 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start struct drm_msm_gem_submit req = { .pipe = to_msm_pipe(ring->pipe)->pipe, }; - uint32_t i, submit_offset, size; + uint32_t i; int ret; - submit_offset = offset_bytes(last_start, ring->start); - size = offset_bytes(ring->cur, last_start); - - get_cmd(ring, msm_ring->cmd, submit_offset, size, MSM_SUBMIT_CMD_BUF); + finalize_current_cmd(ring, last_start); /* needs to be after get_cmd() as that could create bos/cmds table: */ req.bos = VOID2U64(msm_ring->submit.bos), @@ -367,6 +429,13 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start return ret; } +static void msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size) +{ + assert(to_msm_ringbuffer(ring)->is_growable); + finalize_current_cmd(ring, ring->last_start); + ring_cmd_new(ring, size); +} + static void msm_ringbuffer_reset(struct fd_ringbuffer *ring) { flush_reset(ring); @@ -375,14 +444,14 @@ static void msm_ringbuffer_reset(struct fd_ringbuffer *ring) static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *r) { - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); struct fd_ringbuffer *parent = ring->parent ? ring->parent : ring; struct msm_bo *msm_bo = to_msm_bo(r->bo); struct drm_msm_gem_submit_reloc *reloc; - uint32_t idx = APPEND(msm_ring->cmd, relocs); + struct msm_cmd *cmd = current_cmd(ring); + uint32_t idx = APPEND(cmd, relocs); uint32_t addr; - reloc = &msm_ring->cmd->relocs[idx]; + reloc = &cmd->relocs[idx]; reloc->reloc_idx = bo2idx(parent, r->bo, r->flags); reloc->reloc_offset = r->offset; @@ -398,26 +467,53 @@ static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, (*ring->cur++) = addr | r->or; } -static void msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, +static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx, uint32_t submit_offset, uint32_t size) { - struct msm_cmd *cmd = to_msm_ringbuffer(target)->cmd; + struct msm_cmd *cmd = NULL; + uint32_t idx = 0; + + LIST_FOR_EACH_ENTRY(cmd, &to_msm_ringbuffer(target)->cmd_list, list) { + if (idx == cmd_idx) + break; + idx++; + } - get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF); + assert(cmd && (idx == cmd_idx)); + + if (idx < (to_msm_ringbuffer(target)->cmd_count - 1)) { + /* All but the last cmd buffer is fully "baked" (ie. already has + * done get_cmd() to add it to the cmds table). But in this case, + * the size we get is invalid (since it is calculated from the + * last cmd buffer): + */ + size = cmd->size; + } else { + get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF); + } msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ .bo = cmd->ring_bo, .flags = FD_RELOC_READ, .offset = submit_offset, }); + + return size; +} + +static uint32_t msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) +{ + return to_msm_ringbuffer(ring)->cmd_count; } static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring) { struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - if (msm_ring->cmd) - ring_cmd_del(msm_ring->cmd); + + flush_reset(ring); + delete_cmds(msm_ring); + free(msm_ring->submit.cmds); free(msm_ring->submit.bos); free(msm_ring->bos); @@ -428,9 +524,11 @@ static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring) static const struct fd_ringbuffer_funcs funcs = { .hostptr = msm_ringbuffer_hostptr, .flush = msm_ringbuffer_flush, + .grow = msm_ringbuffer_grow, .reset = msm_ringbuffer_reset, .emit_reloc = msm_ringbuffer_emit_reloc, .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, + .cmd_count = msm_ringbuffer_cmd_count, .destroy = msm_ringbuffer_destroy, }; @@ -446,15 +544,20 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe, goto fail; } + if (size == 0) { + assert(pipe->dev->version >= FD_VERSION_UNLIMITED_CMDS); + size = INIT_SIZE; + msm_ring->is_growable = TRUE; + } + + list_inithead(&msm_ring->cmd_list); + ring = &msm_ring->base; ring->funcs = &funcs; + ring->size = size; ring->pipe = pipe; /* needed in ring_cmd_new() */ - msm_ring->cmd = ring_cmd_new(ring, size); - if (!msm_ring->cmd) { - ERROR_MSG("command buffer allocation failed"); - goto fail; - } + ring_cmd_new(ring, size); return ring; fail: -- 2.7.4