freedreno: add bo cache

author Rob Clark <robclark@freedesktop.org>

Fri, 13 Dec 2013 17:48:30 +0000 (12:48 -0500)

committer Rob Clark <robclark@freedesktop.org>

Fri, 13 Dec 2013 20:48:10 +0000 (15:48 -0500)
author Rob Clark <robclark@freedesktop.org>
Fri, 13 Dec 2013 17:48:30 +0000 (12:48 -0500)
committer Rob Clark <robclark@freedesktop.org>
Fri, 13 Dec 2013 20:48:10 +0000 (15:48 -0500)
diff --git a/freedreno/freedreno_bo.c b/freedreno/freedreno_bo.c

index 92c7dd7..8cea4de 100644 (file)
--- a/freedreno/freedreno_bo.c
+++ b/freedreno/freedreno_bo.c
@@ -31,6 +31,8 @@
  
  static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
  
+static void bo_del(struct fd_bo *bo);
+
  /* set buffer name, and add to table, call w/ table_lock held: */
  static void set_name(struct fd_bo *bo, uint32_t name)
  {
@@ -68,24 +70,128 @@ static struct fd_bo * bo_from_handle(struct fd_device *dev,
         bo->size = size;
         bo->handle = handle;
         atomic_set(&bo->refcnt, 1);
+       list_inithead(&bo->list);
         /* add ourself into the handle table: */
         drmHashInsert(dev->handle_table, handle, bo);
         return bo;
  }
  
+/* Frees older cached buffers.  Called under table_lock */
+void fd_cleanup_bo_cache(struct fd_device *dev, time_t time)
+{
+       int i;
+
+       if (dev->time == time)
+               return;
+
+       for (i = 0; i < dev->num_buckets; i++) {
+               struct fd_bo_bucket *bucket = &dev->cache_bucket[i];
+               struct fd_bo *bo;
+
+               while (!LIST_IS_EMPTY(&bucket->list)) {
+                       bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+
+                       /* keep things in cache for at least 1 second: */
+                       if (time && ((time - bo->free_time) <= 1))
+                               break;
+
+                       list_del(&bo->list);
+                       bo_del(bo);
+               }
+       }
+
+       dev->time = time;
+}
+
+static struct fd_bo_bucket * get_bucket(struct fd_device *dev, uint32_t size)
+{
+       int i;
+
+       /* hmm, this is what intel does, but I suppose we could calculate our
+        * way to the correct bucket size rather than looping..
+        */
+       for (i = 0; i < dev->num_buckets; i++) {
+               struct fd_bo_bucket *bucket = &dev->cache_bucket[i];
+               if (bucket->size >= size) {
+                       return bucket;
+               }
+       }
+
+       return NULL;
+}
+
+static int is_idle(struct fd_bo *bo)
+{
+       return fd_bo_cpu_prep(bo, NULL,
+                       DRM_FREEDRENO_PREP_READ |
+                       DRM_FREEDRENO_PREP_WRITE |
+                       DRM_FREEDRENO_PREP_NOSYNC) == 0;
+}
+
+static struct fd_bo *find_in_bucket(struct fd_device *dev,
+               struct fd_bo_bucket *bucket, uint32_t flags)
+{
+       struct fd_bo *bo = NULL;
+
+       /* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could
+        * skip the busy check.. if it is only going to be a render target
+        * then we probably don't need to stall..
+        *
+        * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail
+        * (MRU, since likely to be in GPU cache), rather than head (LRU)..
+        */
+       pthread_mutex_lock(&table_lock);
+       while (!LIST_IS_EMPTY(&bucket->list)) {
+               bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+               if (0 /* TODO: if madvise tells us bo is gone... */) {
+                       list_del(&bo->list);
+                       bo_del(bo);
+                       bo = NULL;
+                       continue;
+               }
+               /* TODO check for compatible flags? */
+               if (is_idle(bo)) {
+                       list_del(&bo->list);
+                       break;
+               }
+               bo = NULL;
+               break;
+       }
+       pthread_mutex_unlock(&table_lock);
+
+       return bo;
+}
+
+
  struct fd_bo * fd_bo_new(struct fd_device *dev,
                 uint32_t size, uint32_t flags)
  {
         struct fd_bo *bo = NULL;
+       struct fd_bo_bucket *bucket;
         uint32_t handle;
         int ret;
  
-       ret = dev->funcs->bo_new_handle(dev, ALIGN(size, 4096), flags, &handle);
+       size = ALIGN(size, 4096);
+       bucket = get_bucket(dev, size);
+
+       /* see if we can be green and recycle: */
+       if (bucket) {
+               size = bucket->size;
+               bo = find_in_bucket(dev, bucket, flags);
+               if (bo) {
+                       atomic_set(&bo->refcnt, 1);
+                       fd_device_ref(bo->dev);
+                       return bo;
+               }
+       }
+
+       ret = dev->funcs->bo_new_handle(dev, size, flags, &handle);
         if (ret)
                 return NULL;
  
         pthread_mutex_lock(&table_lock);
         bo = bo_from_handle(dev, size, handle);
+       bo->bo_reuse = 1;
         pthread_mutex_unlock(&table_lock);
  
         return bo;
@@ -144,30 +250,61 @@ struct fd_bo * fd_bo_ref(struct fd_bo *bo)
  
  void fd_bo_del(struct fd_bo *bo)
  {
-       struct fd_device *dev;
+       struct fd_device *dev = bo->dev;
  
         if (!atomic_dec_and_test(&bo->refcnt))
                 return;
  
+       pthread_mutex_lock(&table_lock);
+
+       if (bo->bo_reuse) {
+               struct fd_bo_bucket *bucket = get_bucket(dev, bo->size);
+
+               /* see if we can be green and recycle: */
+               if (bucket) {
+                       struct timespec time;
+
+                       clock_gettime(CLOCK_MONOTONIC, &time);
+
+                       bo->free_time = time.tv_sec;
+                       list_addtail(&bo->list, &bucket->list);
+                       fd_cleanup_bo_cache(dev, time.tv_sec);
+
+                       /* bo's in the bucket cache don't have a ref and
+                        * don't hold a ref to the dev:
+                        */
+
+                       goto out;
+               }
+       }
+
+       bo_del(bo);
+out:
+       fd_device_del_locked(dev);
+       pthread_mutex_unlock(&table_lock);
+}
+
+/* Called under table_lock */
+static void bo_del(struct fd_bo *bo)
+{
         if (bo->map)
                 munmap(bo->map, bo->size);
  
+       /* TODO probably bo's in bucket list get removed from
+        * handle table??
+        */
+
         if (bo->handle) {
                 struct drm_gem_close req = {
                                 .handle = bo->handle,
                 };
-               pthread_mutex_lock(&table_lock);
                 drmHashDelete(bo->dev->handle_table, bo->handle);
                 if (bo->name)
                         drmHashDelete(bo->dev->name_table, bo->name);
                 drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
-               pthread_mutex_unlock(&table_lock);
         }
  
-       dev = bo->dev;
         bo->funcs->destroy(bo);
-
-       fd_device_del(dev);
  }
  
  int fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
diff --git a/freedreno/freedreno_device.c b/freedreno/freedreno_device.c

index 1e3d9df..6486983 100644 (file)
--- a/freedreno/freedreno_device.c
+++ b/freedreno/freedreno_device.c
@@ -39,6 +39,44 @@ static void * dev_table;
  struct fd_device * kgsl_device_new(int fd);
  struct fd_device * msm_device_new(int fd);
  
+static void
+add_bucket(struct fd_device *dev, int size)
+{
+       unsigned int i = dev->num_buckets;
+
+       assert(i < ARRAY_SIZE(dev->cache_bucket));
+
+       list_inithead(&dev->cache_bucket[i].list);
+       dev->cache_bucket[i].size = size;
+       dev->num_buckets++;
+}
+
+static void
+init_cache_buckets(struct fd_device *dev)
+{
+       unsigned long size, cache_max_size = 64 * 1024 * 1024;
+
+       /* OK, so power of two buckets was too wasteful of memory.
+        * Give 3 other sizes between each power of two, to hopefully
+        * cover things accurately enough.  (The alternative is
+        * probably to just go for exact matching of sizes, and assume
+        * that for things like composited window resize the tiled
+        * width/height alignment and rounding of sizes to pages will
+        * get us useful cache hit rates anyway)
+        */
+       add_bucket(dev, 4096);
+       add_bucket(dev, 4096 * 2);
+       add_bucket(dev, 4096 * 3);
+
+       /* Initialize the linked lists for BO reuse cache. */
+       for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+               add_bucket(dev, size);
+               add_bucket(dev, size + size * 1 / 4);
+               add_bucket(dev, size + size * 2 / 4);
+               add_bucket(dev, size + size * 3 / 4);
+       }
+}
+
  static struct fd_device * fd_device_new_impl(int fd)
  {
         struct fd_device *dev;
@@ -69,6 +107,7 @@ static struct fd_device * fd_device_new_impl(int fd)
         dev->fd = fd;
         dev->handle_table = drmHashCreate();
         dev->name_table = drmHashCreate();
+       init_cache_buckets(dev);
  
         return dev;
  }
@@ -102,14 +141,27 @@ struct fd_device * fd_device_ref(struct fd_device *dev)
         return dev;
  }
  
+static void fd_device_del_impl(struct fd_device *dev)
+{
+       fd_cleanup_bo_cache(dev, 0);
+       drmHashDestroy(dev->handle_table);
+       drmHashDestroy(dev->name_table);
+       drmHashDelete(dev_table, dev->fd);
+       dev->funcs->destroy(dev);
+}
+
+void fd_device_del_locked(struct fd_device *dev)
+{
+       if (!atomic_dec_and_test(&dev->refcnt))
+               return;
+       fd_device_del_impl(dev);
+}
+
  void fd_device_del(struct fd_device *dev)
  {
         if (!atomic_dec_and_test(&dev->refcnt))
                 return;
         pthread_mutex_lock(&table_lock);
-       drmHashDestroy(dev->handle_table);
-       drmHashDestroy(dev->name_table);
-       drmHashDelete(dev_table, dev->fd);
+       fd_device_del_impl(dev);
         pthread_mutex_unlock(&table_lock);
-       dev->funcs->destroy(dev);
  }
diff --git a/freedreno/freedreno_priv.h b/freedreno/freedreno_priv.h

index 69256f5..061d807 100644 (file)
--- a/freedreno/freedreno_priv.h
+++ b/freedreno/freedreno_priv.h
@@ -59,6 +59,11 @@ struct fd_device_funcs {
         void (*destroy)(struct fd_device *dev);
  };
  
+struct fd_bo_bucket {
+       uint32_t size;
+       struct list_head list;
+};
+
  struct fd_device {
         int fd;
         atomic_t refcnt;
@@ -75,8 +80,17 @@ struct fd_device {
         void *handle_table, *name_table;
  
         struct fd_device_funcs *funcs;
+
+       struct fd_bo_bucket cache_bucket[14 * 4];
+       int num_buckets;
+       time_t time;
  };
  
+void fd_cleanup_bo_cache(struct fd_device *dev, time_t time);
+
+/* for where @table_lock is already held: */
+void fd_device_del_locked(struct fd_device *dev);
+
  struct fd_pipe_funcs {
         struct fd_ringbuffer * (*ringbuffer_new)(struct fd_pipe *pipe, uint32_t size);
         int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value);
@@ -120,6 +134,10 @@ struct fd_bo {
         void *map;
         atomic_t refcnt;
         struct fd_bo_funcs *funcs;
+
+       int bo_reuse;
+       struct list_head list;   /* bucket-list entry */
+       time_t free_time;        /* time when added to bucket-list */
  };
  
  struct fd_bo *fd_bo_from_handle(struct fd_device *dev,
diff --git a/freedreno/kgsl/kgsl_bo.c b/freedreno/kgsl/kgsl_bo.c

index 0d019cb..585851c 100644 (file)
--- a/freedreno/kgsl/kgsl_bo.c
+++ b/freedreno/kgsl/kgsl_bo.c
@@ -80,9 +80,24 @@ static int kgsl_bo_offset(struct fd_bo *bo, uint64_t *offset)
  static int kgsl_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
  {
         uint32_t timestamp = kgsl_bo_get_timestamp(to_kgsl_bo(bo));
-       if (timestamp) {
-               fd_pipe_wait(pipe, timestamp);
+
+       if (op & DRM_FREEDRENO_PREP_NOSYNC) {
+               uint32_t current;
+               int ret;
+
+               ret = kgsl_pipe_timestamp(to_kgsl_pipe(pipe), &current);
+               if (ret)
+                       return ret;
+
+               if (timestamp > current)
+                       return -EBUSY;
+
+               return 0;
         }
+
+       if (timestamp)
+               fd_pipe_wait(pipe, timestamp);
+
         return 0;
  }
author	Rob Clark <robclark@freedesktop.org>
	Fri, 13 Dec 2013 17:48:30 +0000 (12:48 -0500)
committer	Rob Clark <robclark@freedesktop.org>
	Fri, 13 Dec 2013 20:48:10 +0000 (15:48 -0500)
freedreno/freedreno_bo.c		patch \| blob \| history
freedreno/freedreno_device.c		patch \| blob \| history
freedreno/freedreno_priv.h		patch \| blob \| history
freedreno/kgsl/kgsl_bo.c		patch \| blob \| history