static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
+static void bo_del(struct fd_bo *bo);
+
/* set buffer name, and add to table, call w/ table_lock held: */
static void set_name(struct fd_bo *bo, uint32_t name)
{
bo->size = size;
bo->handle = handle;
atomic_set(&bo->refcnt, 1);
+ list_inithead(&bo->list);
/* add ourself into the handle table: */
drmHashInsert(dev->handle_table, handle, bo);
return bo;
}
+/* Frees older cached buffers. Called under table_lock */
+void fd_cleanup_bo_cache(struct fd_device *dev, time_t time)
+{
+ int i;
+
+ if (dev->time == time)
+ return;
+
+ for (i = 0; i < dev->num_buckets; i++) {
+ struct fd_bo_bucket *bucket = &dev->cache_bucket[i];
+ struct fd_bo *bo;
+
+ while (!LIST_IS_EMPTY(&bucket->list)) {
+ bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+
+ /* keep things in cache for at least 1 second: */
+ if (time && ((time - bo->free_time) <= 1))
+ break;
+
+ list_del(&bo->list);
+ bo_del(bo);
+ }
+ }
+
+ dev->time = time;
+}
+
+static struct fd_bo_bucket * get_bucket(struct fd_device *dev, uint32_t size)
+{
+ int i;
+
+ /* hmm, this is what intel does, but I suppose we could calculate our
+ * way to the correct bucket size rather than looping..
+ */
+ for (i = 0; i < dev->num_buckets; i++) {
+ struct fd_bo_bucket *bucket = &dev->cache_bucket[i];
+ if (bucket->size >= size) {
+ return bucket;
+ }
+ }
+
+ return NULL;
+}
+
+static int is_idle(struct fd_bo *bo)
+{
+ return fd_bo_cpu_prep(bo, NULL,
+ DRM_FREEDRENO_PREP_READ |
+ DRM_FREEDRENO_PREP_WRITE |
+ DRM_FREEDRENO_PREP_NOSYNC) == 0;
+}
+
+static struct fd_bo *find_in_bucket(struct fd_device *dev,
+ struct fd_bo_bucket *bucket, uint32_t flags)
+{
+ struct fd_bo *bo = NULL;
+
+ /* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could
+ * skip the busy check.. if it is only going to be a render target
+ * then we probably don't need to stall..
+ *
+ * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail
+ * (MRU, since likely to be in GPU cache), rather than head (LRU)..
+ */
+ pthread_mutex_lock(&table_lock);
+ while (!LIST_IS_EMPTY(&bucket->list)) {
+ bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+ if (0 /* TODO: if madvise tells us bo is gone... */) {
+ list_del(&bo->list);
+ bo_del(bo);
+ bo = NULL;
+ continue;
+ }
+ /* TODO check for compatible flags? */
+ if (is_idle(bo)) {
+ list_del(&bo->list);
+ break;
+ }
+ bo = NULL;
+ break;
+ }
+ pthread_mutex_unlock(&table_lock);
+
+ return bo;
+}
+
+
struct fd_bo * fd_bo_new(struct fd_device *dev,
uint32_t size, uint32_t flags)
{
struct fd_bo *bo = NULL;
+ struct fd_bo_bucket *bucket;
uint32_t handle;
int ret;
- ret = dev->funcs->bo_new_handle(dev, ALIGN(size, 4096), flags, &handle);
+ size = ALIGN(size, 4096);
+ bucket = get_bucket(dev, size);
+
+ /* see if we can be green and recycle: */
+ if (bucket) {
+ size = bucket->size;
+ bo = find_in_bucket(dev, bucket, flags);
+ if (bo) {
+ atomic_set(&bo->refcnt, 1);
+ fd_device_ref(bo->dev);
+ return bo;
+ }
+ }
+
+ ret = dev->funcs->bo_new_handle(dev, size, flags, &handle);
if (ret)
return NULL;
pthread_mutex_lock(&table_lock);
bo = bo_from_handle(dev, size, handle);
+ bo->bo_reuse = 1;
pthread_mutex_unlock(&table_lock);
return bo;
void fd_bo_del(struct fd_bo *bo)
{
- struct fd_device *dev;
+ struct fd_device *dev = bo->dev;
if (!atomic_dec_and_test(&bo->refcnt))
return;
+ pthread_mutex_lock(&table_lock);
+
+ if (bo->bo_reuse) {
+ struct fd_bo_bucket *bucket = get_bucket(dev, bo->size);
+
+ /* see if we can be green and recycle: */
+ if (bucket) {
+ struct timespec time;
+
+ clock_gettime(CLOCK_MONOTONIC, &time);
+
+ bo->free_time = time.tv_sec;
+ list_addtail(&bo->list, &bucket->list);
+ fd_cleanup_bo_cache(dev, time.tv_sec);
+
+ /* bo's in the bucket cache don't have a ref and
+ * don't hold a ref to the dev:
+ */
+
+ goto out;
+ }
+ }
+
+ bo_del(bo);
+out:
+ fd_device_del_locked(dev);
+ pthread_mutex_unlock(&table_lock);
+}
+
+/* Called under table_lock */
+static void bo_del(struct fd_bo *bo)
+{
if (bo->map)
munmap(bo->map, bo->size);
+ /* TODO probably bo's in bucket list get removed from
+ * handle table??
+ */
+
if (bo->handle) {
struct drm_gem_close req = {
.handle = bo->handle,
};
- pthread_mutex_lock(&table_lock);
drmHashDelete(bo->dev->handle_table, bo->handle);
if (bo->name)
drmHashDelete(bo->dev->name_table, bo->name);
drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
- pthread_mutex_unlock(&table_lock);
}
- dev = bo->dev;
bo->funcs->destroy(bo);
-
- fd_device_del(dev);
}
int fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
struct fd_device * kgsl_device_new(int fd);
struct fd_device * msm_device_new(int fd);
+static void
+add_bucket(struct fd_device *dev, int size)
+{
+ unsigned int i = dev->num_buckets;
+
+ assert(i < ARRAY_SIZE(dev->cache_bucket));
+
+ list_inithead(&dev->cache_bucket[i].list);
+ dev->cache_bucket[i].size = size;
+ dev->num_buckets++;
+}
+
+static void
+init_cache_buckets(struct fd_device *dev)
+{
+ unsigned long size, cache_max_size = 64 * 1024 * 1024;
+
+ /* OK, so power of two buckets was too wasteful of memory.
+ * Give 3 other sizes between each power of two, to hopefully
+ * cover things accurately enough. (The alternative is
+ * probably to just go for exact matching of sizes, and assume
+ * that for things like composited window resize the tiled
+ * width/height alignment and rounding of sizes to pages will
+ * get us useful cache hit rates anyway)
+ */
+ add_bucket(dev, 4096);
+ add_bucket(dev, 4096 * 2);
+ add_bucket(dev, 4096 * 3);
+
+ /* Initialize the linked lists for BO reuse cache. */
+ for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+ add_bucket(dev, size);
+ add_bucket(dev, size + size * 1 / 4);
+ add_bucket(dev, size + size * 2 / 4);
+ add_bucket(dev, size + size * 3 / 4);
+ }
+}
+
static struct fd_device * fd_device_new_impl(int fd)
{
struct fd_device *dev;
dev->fd = fd;
dev->handle_table = drmHashCreate();
dev->name_table = drmHashCreate();
+ init_cache_buckets(dev);
return dev;
}
return dev;
}
+static void fd_device_del_impl(struct fd_device *dev)
+{
+ fd_cleanup_bo_cache(dev, 0);
+ drmHashDestroy(dev->handle_table);
+ drmHashDestroy(dev->name_table);
+ drmHashDelete(dev_table, dev->fd);
+ dev->funcs->destroy(dev);
+}
+
+void fd_device_del_locked(struct fd_device *dev)
+{
+ if (!atomic_dec_and_test(&dev->refcnt))
+ return;
+ fd_device_del_impl(dev);
+}
+
void fd_device_del(struct fd_device *dev)
{
if (!atomic_dec_and_test(&dev->refcnt))
return;
pthread_mutex_lock(&table_lock);
- drmHashDestroy(dev->handle_table);
- drmHashDestroy(dev->name_table);
- drmHashDelete(dev_table, dev->fd);
+ fd_device_del_impl(dev);
pthread_mutex_unlock(&table_lock);
- dev->funcs->destroy(dev);
}