simple_mtx_unlock(&table_lock);
}
+/**
+ * Cleanup fences, dropping pipe references. If 'expired' is true, only
+ * cleanup expired fences.
+ *
+ * Normally we expect at most a single fence, the exception being bo's
+ * shared between contexts
+ */
+static void
+cleanup_fences(struct fd_bo *bo, bool expired)
+{
+ simple_mtx_assert_locked(&table_lock);
+
+ for (int i = 0; i < bo->nr_fences; i++) {
+ struct fd_bo_fence *f = &bo->fences[i];
+
+ if (expired && fd_fence_before(f->pipe->control->fence, f->fence))
+ continue;
+
+ fd_pipe_del_locked(f->pipe);
+ bo->nr_fences--;
+
+ if (bo->nr_fences > 0) {
+ /* Shuffle up the last entry to replace the current slot: */
+ bo->fences[i] = bo->fences[bo->nr_fences];
+ i--;
+ }
+ }
+}
+
/* Called under table_lock */
void
bo_del(struct fd_bo *bo)
simple_mtx_assert_locked(&table_lock);
+ cleanup_fences(bo, false);
+ free(bo->fences);
+
if (bo->map)
os_munmap(bo->map, bo->size);
set_name(bo, req.name);
simple_mtx_unlock(&table_lock);
bo->bo_reuse = NO_CACHE;
+ bo->shared = true;
}
*name = bo->name;
fd_bo_handle(struct fd_bo *bo)
{
bo->bo_reuse = NO_CACHE;
+ bo->shared = true;
return bo->handle;
}
}
bo->bo_reuse = NO_CACHE;
+ bo->shared = true;
return prime_fd;
}
int
fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
{
+ if (op & FD_BO_PREP_NOSYNC) {
+ simple_mtx_lock(&table_lock);
+ enum fd_bo_state state = fd_bo_state(bo);
+ simple_mtx_unlock(&table_lock);
+
+ switch (state) {
+ case FD_BO_STATE_IDLE:
+ return 0;
+ case FD_BO_STATE_BUSY:
+ return -EBUSY;
+ case FD_BO_STATE_UNKNOWN:
+ break;
+ }
+ }
return bo->funcs->cpu_prep(bo, pipe, op);
}
void
fd_bo_cpu_fini(struct fd_bo *bo)
{
- bo->funcs->cpu_fini(bo);
+// TODO until we have cached buffers, the kernel side ioctl does nothing,
+// so just skip it. When we have cached buffers, we can make the
+// ioctl conditional
+// bo->funcs->cpu_fini(bo);
}
+
+void
+fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence)
+{
+ simple_mtx_assert_locked(&table_lock);
+
+ if (bo->nosync)
+ return;
+
+ /* The common case is bo re-used on the same pipe it had previously
+ * been used on:
+ */
+ for (int i = 0; i < bo->nr_fences; i++) {
+ struct fd_bo_fence *f = &bo->fences[i];
+ if (f->pipe == pipe) {
+ assert(fd_fence_before(f->fence, fence));
+ f->fence = fence;
+ return;
+ }
+ }
+
+ cleanup_fences(bo, true);
+
+ APPEND(bo, fences, (struct fd_bo_fence){
+ .pipe = fd_pipe_ref_locked(pipe),
+ .fence = fence,
+ });
+}
+
+enum fd_bo_state
+fd_bo_state(struct fd_bo *bo)
+{
+ simple_mtx_assert_locked(&table_lock);
+
+ cleanup_fences(bo, true);
+
+ if (bo->shared || bo->nosync)
+ return FD_BO_STATE_UNKNOWN;
+
+ if (!bo->nr_fences)
+ return FD_BO_STATE_IDLE;
+
+ return FD_BO_STATE_BUSY;
+}
+
return NULL;
}
-static int
-is_idle(struct fd_bo *bo)
-{
- return fd_bo_cpu_prep(bo, NULL,
- FD_BO_PREP_READ | FD_BO_PREP_WRITE |
- FD_BO_PREP_NOSYNC) == 0;
-}
-
static struct fd_bo *
find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
{
if (!list_is_empty(&bucket->list)) {
bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
/* TODO check for compatible flags? */
- if (is_idle(bo)) {
+ if (fd_bo_state(bo) == FD_BO_STATE_IDLE) {
list_del(&bo->list);
} else {
bo = NULL;
FD_GLOBAL_FAULTS, /* # of global (all context) faults */
};
+/**
+ * Helper for fence/seqno comparisions which deals properly with rollover.
+ * Returns true if fence 'a' is before fence 'b'
+ */
+static inline bool
+fd_fence_before(uint32_t a, uint32_t b)
+{
+ return (int32_t)(a - b) < 0;
+}
+
+static inline bool
+fd_fence_after(uint32_t a, uint32_t b)
+{
+ return (int32_t)(a - b) > 0;
+}
+
/* bo flags: */
#define FD_BO_GPUREADONLY BITSET_BIT(1)
#define FD_BO_SCANOUT BITSET_BIT(2)
struct fd_pipe *fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id,
uint32_t prio);
struct fd_pipe *fd_pipe_ref(struct fd_pipe *pipe);
+struct fd_pipe *fd_pipe_ref_locked(struct fd_pipe *pipe);
void fd_pipe_del(struct fd_pipe *pipe);
int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
uint64_t *value);
fd_pipe_get_param(pipe, FD_GPU_ID, &val);
pipe->gpu_id = val;
+ pipe->control_mem = fd_bo_new(dev, sizeof(*pipe->control),
+ 0, "pipe-control");
+ pipe->control = fd_bo_map(pipe->control_mem);
+
+ /* We don't want the control_mem bo to hold a reference to the ourself,
+ * so disable userspace fencing. This also means that we won't be able
+ * to determine if the buffer is idle which is needed by bo-cache. But
+ * pipe creation/destroy is not a high frequency event so just disable
+ * the bo-cache as well:
+ */
+ pipe->control_mem->nosync = true;
+ pipe->control_mem->bo_reuse = NO_CACHE;
+
return pipe;
}
struct fd_pipe *
fd_pipe_ref(struct fd_pipe *pipe)
{
- p_atomic_inc(&pipe->refcnt);
+ simple_mtx_lock(&table_lock);
+ fd_pipe_ref_locked(pipe);
+ simple_mtx_unlock(&table_lock);
+ return pipe;
+}
+
+struct fd_pipe *
+fd_pipe_ref_locked(struct fd_pipe *pipe)
+{
+ simple_mtx_assert_locked(&table_lock);
+ pipe->refcnt++;
return pipe;
}
void
fd_pipe_del(struct fd_pipe *pipe)
{
- if (!p_atomic_dec_zero(&pipe->refcnt))
- return;
- pipe->funcs->destroy(pipe);
+ simple_mtx_lock(&table_lock);
+ fd_pipe_del_locked(pipe);
+ simple_mtx_unlock(&table_lock);
}
void
simple_mtx_assert_locked(&table_lock);
if (!p_atomic_dec_zero(&pipe->refcnt))
return;
+ fd_bo_del_locked(pipe->control_mem);
pipe->funcs->destroy(pipe);
}
int
fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout)
{
+
return pipe->funcs->wait(pipe, timestamp, timeout);
}
+
+uint32_t
+fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring)
+{
+ uint32_t fence = ++pipe->last_fence;
+
+ if (pipe->gpu_id >= 500) {
+ OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
+ OUT_RELOC(ring, control_ptr(pipe, fence)); /* ADDR_LO/HI */
+ OUT_RING(ring, fence);
+ } else {
+ OUT_PKT3(ring, CP_EVENT_WRITE, 3);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
+ OUT_RELOC(ring, control_ptr(pipe, fence)); /* ADDR */
+ OUT_RING(ring, fence);
+ }
+
+ return fence;
+}
void (*destroy)(struct fd_pipe *pipe);
};
+struct fd_pipe_control {
+ uint32_t fence;
+};
+#define control_ptr(pipe, member) \
+ (pipe)->control_mem, offsetof(struct fd_pipe_control, member), 0, 0
+
struct fd_pipe {
struct fd_device *dev;
enum fd_pipe_id id;
uint32_t gpu_id;
+
+ /**
+ * Note refcnt is *not* atomic, but protected by table_lock, since the
+ * table_lock is held in fd_bo_add_fence(), which is the hotpath.
+ */
int32_t refcnt;
+
+ /**
+ * Previous fence seqno allocated for this pipe. The fd_pipe represents
+ * a single timeline, fences allocated by this pipe can be compared to
+ * each other, but fences from different pipes are not comparable (as
+ * there could be preemption of multiple priority level submitqueues at
+ * play)
+ */
+ uint32_t last_fence;
+ struct fd_bo *control_mem;
+ volatile struct fd_pipe_control *control;
+
const struct fd_pipe_funcs *funcs;
};
+uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);
+
struct fd_submit_funcs {
struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
uint32_t size,
const struct fd_submit_funcs *funcs;
struct fd_ringbuffer *primary;
+ uint32_t fence;
};
struct fd_bo_funcs {
void (*destroy)(struct fd_bo *bo);
};
+struct fd_bo_fence {
+ /* For non-shared buffers, track the last pipe the buffer was active
+ * on, and the per-pipe fence value that indicates when the buffer is
+ * idle:
+ */
+ uint32_t fence;
+ struct fd_pipe *pipe;
+};
+
struct fd_bo {
struct fd_device *dev;
uint32_t size;
NO_CACHE = 0,
BO_CACHE = 1,
RING_CACHE = 2,
- } bo_reuse;
+ } bo_reuse : 2;
+
+ /* Buffers that are shared (imported or exported) may be used in
+ * other processes, so we need to fallback to kernel to determine
+ * busyness.
+ */
+ bool shared : 1;
+
+ /* We need to be able to disable userspace fence synchronization for
+ * special internal buffers, namely the pipe->control buffer, to avoid
+ * a circular reference loop.
+ */
+ bool nosync : 1;
struct list_head list; /* bucket-list entry */
time_t free_time; /* time when added to bucket-list */
+
+ DECLARE_ARRAY(struct fd_bo_fence, fences);
+};
+
+void fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence);
+
+enum fd_bo_state {
+ FD_BO_STATE_IDLE,
+ FD_BO_STATE_BUSY,
+ FD_BO_STATE_UNKNOWN,
};
+enum fd_bo_state fd_bo_state(struct fd_bo *bo);
struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd,
uint32_t *out_fence)
{
- debug_assert(submit->primary);
+ submit->fence = fd_pipe_emit_fence(submit->pipe, submit->primary);
return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence);
}
}
}
+ simple_mtx_lock(&table_lock);
+ for (unsigned j = 0; j < msm_submit->nr_bos; j++) {
+ fd_bo_add_fence(msm_submit->bos[j], submit->pipe, submit->fence);
+ }
+ simple_mtx_unlock(&table_lock);
+
if (in_fence_fd != -1) {
req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
req.fence_fd = in_fence_fd;
} else {
submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
}
+
+ simple_mtx_lock(&table_lock);
for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
submit_bos[i].flags = msm_submit->bos[i]->flags;
submit_bos[i].handle = msm_submit->bos[i]->handle;
submit_bos[i].presumed = 0;
+ fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
}
+ simple_mtx_unlock(&table_lock);
+
req.bos = VOID2U64(submit_bos), req.nr_bos = msm_submit->nr_bos;
req.cmds = VOID2U64(cmds), req.nr_cmds = primary->u.nr_cmds;