fprintf(stderr, __VA_ARGS__); \
} while (0)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
struct drm_intel_gem_bo_bucket {
unsigned long size;
};
-/* Only cache objects up to 64MB. Bigger than that, and the rounding of the
- * size makes many operations fail that wouldn't otherwise.
- */
-#define DRM_INTEL_GEM_BO_BUCKETS 14
typedef struct _drm_intel_bufmgr_gem {
drm_intel_bufmgr bufmgr;
int exec_count;
/** Array of lists of cached gem objects of power-of-two sizes */
- struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
+ struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
+ int num_buckets;
uint64_t gtt_size;
int available_fences;
*/
uint32_t tiling_mode;
uint32_t swizzle_mode;
+ unsigned long stride;
time_t free_time;
uint32_t * swizzle_mode);
static int
-drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
- uint32_t stride);
+drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
+ uint32_t tiling_mode,
+ uint32_t stride);
static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
time_t time);
{
int i;
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
+ for (i = 0; i < bufmgr_gem->num_buckets; i++) {
struct drm_intel_gem_bo_bucket *bucket =
&bufmgr_gem->cache_bucket[i];
if (bucket->size >= size) {
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
const char *name,
unsigned long size,
- unsigned long flags)
+ unsigned long flags,
+ uint32_t tiling_mode,
+ unsigned long stride)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
drm_intel_bo_gem *bo_gem;
bucket);
goto retry;
}
+
+ if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+ tiling_mode,
+ stride)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ goto retry;
+ }
}
}
pthread_mutex_unlock(&bufmgr_gem->lock);
return NULL;
}
bo_gem->bo.bufmgr = bufmgr;
+
+ bo_gem->tiling_mode = I915_TILING_NONE;
+ bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+ bo_gem->stride = 0;
+
+ if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+ tiling_mode,
+ stride)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ return NULL;
+ }
}
bo_gem->name = name;
bo_gem->reloc_tree_fences = 0;
bo_gem->used_as_reloc_target = 0;
bo_gem->has_error = 0;
- bo_gem->tiling_mode = I915_TILING_NONE;
- bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
bo_gem->reusable = 1;
drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
unsigned int alignment)
{
return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
- BO_ALLOC_FOR_RENDER);
+ BO_ALLOC_FOR_RENDER,
+ I915_TILING_NONE, 0);
}
static drm_intel_bo *
unsigned long size,
unsigned int alignment)
{
- return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
+ return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
+ I915_TILING_NONE, 0);
}
static drm_intel_bo *
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
drm_intel_bo *bo;
- unsigned long size, stride, aligned_y = y;
- int ret;
+ unsigned long size, stride;
+ uint32_t tiling;
- /* If we're tiled, our allocations are in 8 or 32-row blocks,
- * so failure to align our height means that we won't allocate
- * enough pages.
- *
- * If we're untiled, we still have to align to 2 rows high
- * because the data port accesses 2x2 blocks even if the
- * bottom row isn't to be rendered, so failure to align means
- * we could walk off the end of the GTT and fault. This is
- * documented on 965, and may be the case on older chipsets
- * too so we try to be careful.
- */
- if (*tiling_mode == I915_TILING_NONE)
- aligned_y = ALIGN(y, 2);
- else if (*tiling_mode == I915_TILING_X)
- aligned_y = ALIGN(y, 8);
- else if (*tiling_mode == I915_TILING_Y)
- aligned_y = ALIGN(y, 32);
-
- stride = x * cpp;
- stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode);
- size = stride * aligned_y;
- size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
-
- bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
+ do {
+ unsigned long aligned_y;
+
+ tiling = *tiling_mode;
+
+ /* If we're tiled, our allocations are in 8 or 32-row blocks,
+ * so failure to align our height means that we won't allocate
+ * enough pages.
+ *
+ * If we're untiled, we still have to align to 2 rows high
+ * because the data port accesses 2x2 blocks even if the
+ * bottom row isn't to be rendered, so failure to align means
+ * we could walk off the end of the GTT and fault. This is
+ * documented on 965, and may be the case on older chipsets
+ * too so we try to be careful.
+ */
+ aligned_y = y;
+ if (tiling == I915_TILING_NONE)
+ aligned_y = ALIGN(y, 2);
+ else if (tiling == I915_TILING_X)
+ aligned_y = ALIGN(y, 8);
+ else if (tiling == I915_TILING_Y)
+ aligned_y = ALIGN(y, 32);
+
+ stride = x * cpp;
+ stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling);
+ size = stride * aligned_y;
+ size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
+ } while (*tiling_mode != tiling);
+
+ bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
+ *tiling_mode, stride);
if (!bo)
return NULL;
- ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
- if (ret != 0) {
- drm_intel_gem_bo_unreference(bo);
- return NULL;
- }
-
*pitch = stride;
-
return bo;
}
}
bo_gem->tiling_mode = get_tiling.tiling_mode;
bo_gem->swizzle_mode = get_tiling.swizzle_mode;
+ /* XXX stride is unknown */
drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
{
int i;
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
+ for (i = 0; i < bufmgr_gem->num_buckets; i++) {
struct drm_intel_gem_bo_bucket *bucket =
&bufmgr_gem->cache_bucket[i];
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
struct drm_intel_gem_bo_bucket *bucket;
- uint32_t tiling_mode;
int i;
/* Unreference all the target buffers */
for (i = 0; i < bo_gem->reloc_count; i++) {
- drm_intel_gem_bo_unreference_locked_timed(bo_gem->
- reloc_target_info[i].bo,
- time);
+ if (bo_gem->reloc_target_info[i].bo != bo) {
+ drm_intel_gem_bo_unreference_locked_timed(bo_gem->
+ reloc_target_info[i].bo,
+ time);
+ }
}
bo_gem->reloc_count = 0;
bo_gem->used_as_reloc_target = 0;
bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
- tiling_mode = I915_TILING_NONE;
if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
- drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 &&
drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
I915_MADV_DONTNEED)) {
bo_gem->free_time = time;
pthread_mutex_destroy(&bufmgr_gem->lock);
/* Free any cached buffer objects we were going to reuse */
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
+ for (i = 0; i < bufmgr_gem->num_buckets; i++) {
struct drm_intel_gem_bo_bucket *bucket =
&bufmgr_gem->cache_bucket[i];
drm_intel_bo_gem *bo_gem;
bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
- drm_intel_gem_bo_reference(target_bo);
+ if (target_bo != bo)
+ drm_intel_gem_bo_reference(target_bo);
if (need_fence)
bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
DRM_INTEL_RELOC_FENCE;
}
static int
-drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
- uint32_t stride)
+drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
+ uint32_t tiling_mode,
+ uint32_t stride)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
struct drm_i915_gem_set_tiling set_tiling;
int ret;
- if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
+ if (tiling_mode == bo_gem->tiling_mode &&
+ stride == bo_gem->stride)
return 0;
memset(&set_tiling, 0, sizeof(set_tiling));
- set_tiling.handle = bo_gem->gem_handle;
-
do {
- set_tiling.tiling_mode = *tiling_mode;
+ set_tiling.handle = bo_gem->gem_handle;
+ set_tiling.tiling_mode = tiling_mode;
set_tiling.stride = stride;
ret = ioctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_SET_TILING,
&set_tiling);
} while (ret == -1 && errno == EINTR);
- if (ret == 0) {
- bo_gem->tiling_mode = set_tiling.tiling_mode;
- bo_gem->swizzle_mode = set_tiling.swizzle_mode;
+ if (ret == -1)
+ return -errno;
+
+ bo_gem->tiling_mode = set_tiling.tiling_mode;
+ bo_gem->swizzle_mode = set_tiling.swizzle_mode;
+ bo_gem->stride = stride;
+ return 0;
+}
+
+static int
+drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ uint32_t stride)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ int ret;
+
+ if (bo_gem->global_name == 0)
+ return 0;
+
+ ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
+ if (ret == 0)
drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
- } else
- ret = -errno;
*tiling_mode = bo_gem->tiling_mode;
return ret;
for (i = 0; i < bo_gem->reloc_count; i++) {
if (bo_gem->reloc_target_info[i].bo == target_bo)
return 1;
+ if (bo == bo_gem->reloc_target_info[i].bo)
+ continue;
if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
target_bo))
return 1;
return 0;
}
+static void
+add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
+{
+ unsigned int i = bufmgr_gem->num_buckets;
+
+ assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
+
+ DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
+ bufmgr_gem->cache_bucket[i].size = size;
+ bufmgr_gem->num_buckets++;
+}
+
+static void
+init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
+{
+ unsigned long size, cache_max_size = 64 * 1024 * 1024;
+
+ /* OK, so power of two buckets was too wasteful of memory.
+ * Give 3 other sizes between each power of two, to hopefully
+ * cover things accurately enough. (The alternative is
+ * probably to just go for exact matching of sizes, and assume
+ * that for things like composited window resize the tiled
+ * width/height alignment and rounding of sizes to pages will
+ * get us useful cache hit rates anyway)
+ */
+ add_bucket(bufmgr_gem, 4096);
+ add_bucket(bufmgr_gem, 4096 * 2);
+ add_bucket(bufmgr_gem, 4096 * 3);
+
+ /* Initialize the linked lists for BO reuse cache. */
+ for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+ add_bucket(bufmgr_gem, size);
+
+ add_bucket(bufmgr_gem, size + size * 1 / 4);
+ add_bucket(bufmgr_gem, size + size * 2 / 4);
+ add_bucket(bufmgr_gem, size + size * 3 / 4);
+ }
+}
+
/**
* Initializes the GEM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
drm_intel_bufmgr_gem *bufmgr_gem;
struct drm_i915_gem_get_aperture aperture;
drm_i915_getparam_t gp;
- int ret, i;
- unsigned long size;
+ int ret;
int exec2 = 0, has_bsd = 0;
bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
drm_intel_gem_get_pipe_from_crtc_id;
bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
- /* Initialize the linked lists for BO reuse cache. */
- for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) {
- DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
- bufmgr_gem->cache_bucket[i].size = size;
- }
+ init_cache_buckets(bufmgr_gem);
return &bufmgr_gem->bufmgr;
}