freedreno/drm: Userspace fences

author Rob Clark <robdclark@chromium.org>

Wed, 21 Apr 2021 16:37:05 +0000 (09:37 -0700)

committer Marge Bot <eric+marge@anholt.net>

Wed, 28 Apr 2021 15:36:42 +0000 (15:36 +0000)
author Rob Clark <robdclark@chromium.org>
Wed, 21 Apr 2021 16:37:05 +0000 (09:37 -0700)
committer Marge Bot <eric+marge@anholt.net>
Wed, 28 Apr 2021 15:36:42 +0000 (15:36 +0000)
diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c

index f1c7254..84ede2e 100644 (file)
--- a/src/freedreno/drm/freedreno_bo.c
+++ b/src/freedreno/drm/freedreno_bo.c
@@ -294,6 +294,35 @@ fd_bo_del(struct fd_bo *bo)
     simple_mtx_unlock(&table_lock);
  }
  
+/**
+ * Cleanup fences, dropping pipe references.  If 'expired' is true, only
+ * cleanup expired fences.
+ *
+ * Normally we expect at most a single fence, the exception being bo's
+ * shared between contexts
+ */
+static void
+cleanup_fences(struct fd_bo *bo, bool expired)
+{
+   simple_mtx_assert_locked(&table_lock);
+
+   for (int i = 0; i < bo->nr_fences; i++) {
+      struct fd_bo_fence *f = &bo->fences[i];
+
+      if (expired && fd_fence_before(f->pipe->control->fence, f->fence))
+         continue;
+
+      fd_pipe_del_locked(f->pipe);
+      bo->nr_fences--;
+
+      if (bo->nr_fences > 0) {
+         /* Shuffle up the last entry to replace the current slot: */
+         bo->fences[i] = bo->fences[bo->nr_fences];
+         i--;
+      }
+   }
+}
+
  /* Called under table_lock */
  void
  bo_del(struct fd_bo *bo)
@@ -302,6 +331,9 @@ bo_del(struct fd_bo *bo)
  
     simple_mtx_assert_locked(&table_lock);
  
+   cleanup_fences(bo, false);
+   free(bo->fences);
+
     if (bo->map)
        os_munmap(bo->map, bo->size);
  
@@ -340,6 +372,7 @@ fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
        set_name(bo, req.name);
        simple_mtx_unlock(&table_lock);
        bo->bo_reuse = NO_CACHE;
+      bo->shared = true;
     }
  
     *name = bo->name;
@@ -351,6 +384,7 @@ uint32_t
  fd_bo_handle(struct fd_bo *bo)
  {
     bo->bo_reuse = NO_CACHE;
+   bo->shared = true;
     return bo->handle;
  }
  
@@ -366,6 +400,7 @@ fd_bo_dmabuf(struct fd_bo *bo)
     }
  
     bo->bo_reuse = NO_CACHE;
+   bo->shared = true;
  
     return prime_fd;
  }
@@ -402,11 +437,73 @@ fd_bo_map(struct fd_bo *bo)
  int
  fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
  {
+   if (op & FD_BO_PREP_NOSYNC) {
+      simple_mtx_lock(&table_lock);
+      enum fd_bo_state state = fd_bo_state(bo);
+      simple_mtx_unlock(&table_lock);
+
+      switch (state) {
+      case FD_BO_STATE_IDLE:
+         return 0;
+      case FD_BO_STATE_BUSY:
+         return -EBUSY;
+      case FD_BO_STATE_UNKNOWN:
+         break;
+      }
+   }
     return bo->funcs->cpu_prep(bo, pipe, op);
  }
  
  void
  fd_bo_cpu_fini(struct fd_bo *bo)
  {
-   bo->funcs->cpu_fini(bo);
+// TODO until we have cached buffers, the kernel side ioctl does nothing,
+//      so just skip it.  When we have cached buffers, we can make the
+//      ioctl conditional
+//   bo->funcs->cpu_fini(bo);
  }
+
+void
+fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence)
+{
+   simple_mtx_assert_locked(&table_lock);
+
+   if (bo->nosync)
+      return;
+
+   /* The common case is bo re-used on the same pipe it had previously
+    * been used on:
+    */
+   for (int i = 0; i < bo->nr_fences; i++) {
+      struct fd_bo_fence *f = &bo->fences[i];
+      if (f->pipe == pipe) {
+         assert(fd_fence_before(f->fence, fence));
+         f->fence = fence;
+         return;
+      }
+   }
+
+   cleanup_fences(bo, true);
+
+   APPEND(bo, fences, (struct fd_bo_fence){
+      .pipe = fd_pipe_ref_locked(pipe),
+      .fence = fence,
+   });
+}
+
+enum fd_bo_state
+fd_bo_state(struct fd_bo *bo)
+{
+   simple_mtx_assert_locked(&table_lock);
+
+   cleanup_fences(bo, true);
+
+   if (bo->shared || bo->nosync)
+      return FD_BO_STATE_UNKNOWN;
+
+   if (!bo->nr_fences)
+      return FD_BO_STATE_IDLE;
+
+   return FD_BO_STATE_BUSY;
+}
+
diff --git a/src/freedreno/drm/freedreno_bo_cache.c b/src/freedreno/drm/freedreno_bo_cache.c

index e46d04a..6a028d9 100644 (file)
--- a/src/freedreno/drm/freedreno_bo_cache.c
+++ b/src/freedreno/drm/freedreno_bo_cache.c
@@ -122,14 +122,6 @@ get_bucket(struct fd_bo_cache *cache, uint32_t size)
     return NULL;
  }
  
-static int
-is_idle(struct fd_bo *bo)
-{
-   return fd_bo_cpu_prep(bo, NULL,
-                         FD_BO_PREP_READ | FD_BO_PREP_WRITE |
-                            FD_BO_PREP_NOSYNC) == 0;
-}
-
  static struct fd_bo *
  find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
  {
@@ -146,7 +138,7 @@ find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
     if (!list_is_empty(&bucket->list)) {
        bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
        /* TODO check for compatible flags? */
-      if (is_idle(bo)) {
+      if (fd_bo_state(bo) == FD_BO_STATE_IDLE) {
           list_del(&bo->list);
        } else {
           bo = NULL;
diff --git a/src/freedreno/drm/freedreno_drmif.h b/src/freedreno/drm/freedreno_drmif.h

index 9f5058c..3dd3609 100644 (file)
--- a/src/freedreno/drm/freedreno_drmif.h
+++ b/src/freedreno/drm/freedreno_drmif.h
@@ -63,6 +63,22 @@ enum fd_param_id {
     FD_GLOBAL_FAULTS, /* # of global (all context) faults */
  };
  
+/**
+ * Helper for fence/seqno comparisions which deals properly with rollover.
+ * Returns true if fence 'a' is before fence 'b'
+ */
+static inline bool
+fd_fence_before(uint32_t a, uint32_t b)
+{
+   return (int32_t)(a - b) < 0;
+}
+
+static inline bool
+fd_fence_after(uint32_t a, uint32_t b)
+{
+   return (int32_t)(a - b) > 0;
+}
+
  /* bo flags: */
  #define FD_BO_GPUREADONLY  BITSET_BIT(1)
  #define FD_BO_SCANOUT      BITSET_BIT(2)
@@ -106,6 +122,7 @@ struct fd_pipe *fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id);
  struct fd_pipe *fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id,
                               uint32_t prio);
  struct fd_pipe *fd_pipe_ref(struct fd_pipe *pipe);
+struct fd_pipe *fd_pipe_ref_locked(struct fd_pipe *pipe);
  void fd_pipe_del(struct fd_pipe *pipe);
  int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
                        uint64_t *value);
diff --git a/src/freedreno/drm/freedreno_pipe.c b/src/freedreno/drm/freedreno_pipe.c

index b7e633f..9078c29 100644 (file)
--- a/src/freedreno/drm/freedreno_pipe.c
+++ b/src/freedreno/drm/freedreno_pipe.c
@@ -60,6 +60,19 @@ fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
     fd_pipe_get_param(pipe, FD_GPU_ID, &val);
     pipe->gpu_id = val;
  
+   pipe->control_mem = fd_bo_new(dev, sizeof(*pipe->control),
+                                 0, "pipe-control");
+   pipe->control = fd_bo_map(pipe->control_mem);
+
+   /* We don't want the control_mem bo to hold a reference to the ourself,
+    * so disable userspace fencing.  This also means that we won't be able
+    * to determine if the buffer is idle which is needed by bo-cache.  But
+    * pipe creation/destroy is not a high frequency event so just disable
+    * the bo-cache as well:
+    */
+   pipe->control_mem->nosync = true;
+   pipe->control_mem->bo_reuse = NO_CACHE;
+
     return pipe;
  }
  
@@ -72,16 +85,26 @@ fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
  struct fd_pipe *
  fd_pipe_ref(struct fd_pipe *pipe)
  {
-   p_atomic_inc(&pipe->refcnt);
+   simple_mtx_lock(&table_lock);
+   fd_pipe_ref_locked(pipe);
+   simple_mtx_unlock(&table_lock);
+   return pipe;
+}
+
+struct fd_pipe *
+fd_pipe_ref_locked(struct fd_pipe *pipe)
+{
+   simple_mtx_assert_locked(&table_lock);
+   pipe->refcnt++;
     return pipe;
  }
  
  void
  fd_pipe_del(struct fd_pipe *pipe)
  {
-   if (!p_atomic_dec_zero(&pipe->refcnt))
-      return;
-   pipe->funcs->destroy(pipe);
+   simple_mtx_lock(&table_lock);
+   fd_pipe_del_locked(pipe);
+   simple_mtx_unlock(&table_lock);
  }
  
  void
@@ -90,6 +113,7 @@ fd_pipe_del_locked(struct fd_pipe *pipe)
     simple_mtx_assert_locked(&table_lock);
     if (!p_atomic_dec_zero(&pipe->refcnt))
        return;
+   fd_bo_del_locked(pipe->control_mem);
     pipe->funcs->destroy(pipe);
  }
  
@@ -108,5 +132,26 @@ fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp)
  int
  fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout)
  {
+
     return pipe->funcs->wait(pipe, timestamp, timeout);
  }
+
+uint32_t
+fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring)
+{
+   uint32_t fence = ++pipe->last_fence;
+
+   if (pipe->gpu_id >= 500) {
+      OUT_PKT7(ring, CP_EVENT_WRITE, 4);
+      OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
+      OUT_RELOC(ring, control_ptr(pipe, fence));   /* ADDR_LO/HI */
+      OUT_RING(ring, fence);
+   } else {
+      OUT_PKT3(ring, CP_EVENT_WRITE, 3);
+      OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
+      OUT_RELOC(ring, control_ptr(pipe, fence));   /* ADDR */
+      OUT_RING(ring, fence);
+   }
+
+   return fence;
+}
diff --git a/src/freedreno/drm/freedreno_priv.h b/src/freedreno/drm/freedreno_priv.h

index d20cc78..21dcb7d 100644 (file)
--- a/src/freedreno/drm/freedreno_priv.h
+++ b/src/freedreno/drm/freedreno_priv.h
@@ -151,14 +151,39 @@ struct fd_pipe_funcs {
     void (*destroy)(struct fd_pipe *pipe);
  };
  
+struct fd_pipe_control {
+   uint32_t fence;
+};
+#define control_ptr(pipe, member) \
+   (pipe)->control_mem, offsetof(struct fd_pipe_control, member), 0, 0
+
  struct fd_pipe {
     struct fd_device *dev;
     enum fd_pipe_id id;
     uint32_t gpu_id;
+
+   /**
+    * Note refcnt is *not* atomic, but protected by table_lock, since the
+    * table_lock is held in fd_bo_add_fence(), which is the hotpath.
+    */
     int32_t refcnt;
+
+   /**
+    * Previous fence seqno allocated for this pipe.  The fd_pipe represents
+    * a single timeline, fences allocated by this pipe can be compared to
+    * each other, but fences from different pipes are not comparable (as
+    * there could be preemption of multiple priority level submitqueues at
+    * play)
+    */
+   uint32_t last_fence;
+   struct fd_bo *control_mem;
+   volatile struct fd_pipe_control *control;
+
     const struct fd_pipe_funcs *funcs;
  };
  
+uint32_t fd_pipe_emit_fence(struct fd_pipe *pipe, struct fd_ringbuffer *ring);
+
  struct fd_submit_funcs {
     struct fd_ringbuffer *(*new_ringbuffer)(struct fd_submit *submit,
                                             uint32_t size,
@@ -173,6 +198,7 @@ struct fd_submit {
     const struct fd_submit_funcs *funcs;
  
     struct fd_ringbuffer *primary;
+   uint32_t fence;
  };
  
  struct fd_bo_funcs {
@@ -185,6 +211,15 @@ struct fd_bo_funcs {
     void (*destroy)(struct fd_bo *bo);
  };
  
+struct fd_bo_fence {
+   /* For non-shared buffers, track the last pipe the buffer was active
+    * on, and the per-pipe fence value that indicates when the buffer is
+    * idle:
+    */
+   uint32_t fence;
+   struct fd_pipe *pipe;
+};
+
  struct fd_bo {
     struct fd_device *dev;
     uint32_t size;
@@ -200,11 +235,34 @@ struct fd_bo {
        NO_CACHE = 0,
        BO_CACHE = 1,
        RING_CACHE = 2,
-   } bo_reuse;
+   } bo_reuse : 2;
+
+   /* Buffers that are shared (imported or exported) may be used in
+    * other processes, so we need to fallback to kernel to determine
+    * busyness.
+    */
+   bool shared : 1;
+
+   /* We need to be able to disable userspace fence synchronization for
+    * special internal buffers, namely the pipe->control buffer, to avoid
+    * a circular reference loop.
+    */
+   bool nosync : 1;
  
     struct list_head list; /* bucket-list entry */
     time_t free_time;      /* time when added to bucket-list */
+
+   DECLARE_ARRAY(struct fd_bo_fence, fences);
+};
+
+void fd_bo_add_fence(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t fence);
+
+enum fd_bo_state {
+   FD_BO_STATE_IDLE,
+   FD_BO_STATE_BUSY,
+   FD_BO_STATE_UNKNOWN,
  };
+enum fd_bo_state fd_bo_state(struct fd_bo *bo);
  
  struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
  
diff --git a/src/freedreno/drm/freedreno_ringbuffer.c b/src/freedreno/drm/freedreno_ringbuffer.c

index 291bd74..2e3000b 100644 (file)
--- a/src/freedreno/drm/freedreno_ringbuffer.c
+++ b/src/freedreno/drm/freedreno_ringbuffer.c
@@ -48,7 +48,7 @@ int
  fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd,
                  uint32_t *out_fence)
  {
-   debug_assert(submit->primary);
+   submit->fence = fd_pipe_emit_fence(submit->pipe, submit->primary);
     return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence);
  }
  
diff --git a/src/freedreno/drm/msm_ringbuffer.c b/src/freedreno/drm/msm_ringbuffer.c

index 9039972..5ce4c0f 100644 (file)
--- a/src/freedreno/drm/msm_ringbuffer.c
+++ b/src/freedreno/drm/msm_ringbuffer.c
@@ -343,6 +343,12 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd,
        }
     }
  
+   simple_mtx_lock(&table_lock);
+   for (unsigned j = 0; j < msm_submit->nr_bos; j++) {
+      fd_bo_add_fence(msm_submit->bos[j], submit->pipe, submit->fence);
+   }
+   simple_mtx_unlock(&table_lock);
+
     if (in_fence_fd != -1) {
        req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
        req.fence_fd = in_fence_fd;
diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c

index 71e9af6..a2a00ee 100644 (file)
--- a/src/freedreno/drm/msm_ringbuffer_sp.c
+++ b/src/freedreno/drm/msm_ringbuffer_sp.c
@@ -256,11 +256,16 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
     } else {
        submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
     }
+
+   simple_mtx_lock(&table_lock);
     for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
        submit_bos[i].flags = msm_submit->bos[i]->flags;
        submit_bos[i].handle = msm_submit->bos[i]->handle;
        submit_bos[i].presumed = 0;
+      fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
     }
+   simple_mtx_unlock(&table_lock);
+
     req.bos = VOID2U64(submit_bos), req.nr_bos = msm_submit->nr_bos;
     req.cmds = VOID2U64(cmds), req.nr_cmds = primary->u.nr_cmds;
author	Rob Clark <robdclark@chromium.org>
	Wed, 21 Apr 2021 16:37:05 +0000 (09:37 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 28 Apr 2021 15:36:42 +0000 (15:36 +0000)
src/freedreno/drm/freedreno_bo.c		patch \| blob \| history
src/freedreno/drm/freedreno_bo_cache.c		patch \| blob \| history
src/freedreno/drm/freedreno_drmif.h		patch \| blob \| history
src/freedreno/drm/freedreno_pipe.c		patch \| blob \| history
src/freedreno/drm/freedreno_priv.h		patch \| blob \| history
src/freedreno/drm/freedreno_ringbuffer.c		patch \| blob \| history
src/freedreno/drm/msm_ringbuffer.c		patch \| blob \| history
src/freedreno/drm/msm_ringbuffer_sp.c		patch \| blob \| history