X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=intel%2Fintel_bufmgr_gem.c;h=b4be1afe1a73ae3c607b4ffea9d34b1aa2483c03;hb=1db22ff741f92b84450ec13093e070a6ad5fc857;hp=cf3943c2bdd3eae7f90ae300d10e17dd5bdffcf2;hpb=9c8ad05e8bb1c954b804e40f2f975fed23c24550;p=profile%2Fivi%2Flibdrm.git diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index cf3943c..b4be1af 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include @@ -53,7 +54,6 @@ #include "errno.h" #include "libdrm_lists.h" -#include "intel_atomic.h" #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" @@ -66,6 +66,8 @@ fprintf(stderr, __VA_ARGS__); \ } while (0) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + typedef struct _drm_intel_bo_gem drm_intel_bo_gem; struct drm_intel_gem_bo_bucket { @@ -73,10 +75,6 @@ struct drm_intel_gem_bo_bucket { unsigned long size; }; -/* Only cache objects up to 64MB. Bigger than that, and the rounding of the - * size makes many operations fail that wouldn't otherwise. - */ -#define DRM_INTEL_GEM_BO_BUCKETS 14 typedef struct _drm_intel_bufmgr_gem { drm_intel_bufmgr bufmgr; @@ -87,19 +85,30 @@ typedef struct _drm_intel_bufmgr_gem { pthread_mutex_t lock; struct drm_i915_gem_exec_object *exec_objects; + struct drm_i915_gem_exec_object2 *exec2_objects; drm_intel_bo **exec_bos; int exec_size; int exec_count; /** Array of lists of cached gem objects of power-of-two sizes */ - struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; + struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; + int num_buckets; uint64_t gtt_size; int available_fences; int pci_device; + int gen; char bo_reuse; + char fenced_relocs; } drm_intel_bufmgr_gem; +#define DRM_INTEL_RELOC_FENCE (1<<0) + +typedef struct _drm_intel_reloc_target_info { + drm_intel_bo *bo; + int flags; +} drm_intel_reloc_target; + struct _drm_intel_bo_gem { drm_intel_bo bo; @@ -123,13 +132,16 @@ struct _drm_intel_bo_gem { */ uint32_t tiling_mode; uint32_t swizzle_mode; + unsigned long stride; time_t free_time; /** Array passed to the DRM containing relocation information. */ struct drm_i915_gem_relocation_entry *relocs; - /** Array of bos corresponding to relocs[i].target_handle */ - drm_intel_bo **reloc_target_bo; + /** + * Array of info structs corresponding to relocs[i].target_handle etc + */ + drm_intel_reloc_target *reloc_target_info; /** Number of entries in relocs */ int reloc_count; /** Mapped address for the buffer, saved across map/unmap cycles */ @@ -154,6 +166,11 @@ struct _drm_intel_bo_gem { char used_as_reloc_target; /** + * Boolean of whether we have encountered an error whilst building the relocation tree. + */ + char has_error; + + /** * Boolean of whether this buffer can be re-used */ char reusable; @@ -184,8 +201,9 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, uint32_t * swizzle_mode); static int -drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, - uint32_t stride); +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride); static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, time_t time); @@ -205,11 +223,11 @@ drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, return size; /* 965+ just need multiples of page size for tiling */ - if (IS_I965G(bufmgr_gem)) + if (bufmgr_gem->gen >= 4) return ROUND_UP_TO(size, 4096); /* Older chips need powers of two, of at least 512k or 1M */ - if (IS_I9XX(bufmgr_gem)) { + if (bufmgr_gem->gen == 3) { min_size = 1024*1024; max_size = 128*1024*1024; } else { @@ -237,14 +255,22 @@ static unsigned long drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long pitch, uint32_t tiling_mode) { - unsigned long tile_width = 512; + unsigned long tile_width; unsigned long i; + /* If untiled, then just align it so that we can do rendering + * to it with the 3D engine. + */ if (tiling_mode == I915_TILING_NONE) - return ROUND_UP_TO(pitch, tile_width); + return ALIGN(pitch, 64); + + if (tiling_mode == I915_TILING_X) + tile_width = 512; + else + tile_width = 128; /* 965 is flexible */ - if (IS_I965G(bufmgr_gem)) + if (bufmgr_gem->gen >= 4) return ROUND_UP_TO(pitch, tile_width); /* Pre-965 needs power of two tile width */ @@ -260,7 +286,7 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, { int i; - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { + for (i = 0; i < bufmgr_gem->num_buckets; i++) { struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; if (bucket->size >= size) { @@ -287,7 +313,7 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) } for (j = 0; j < bo_gem->reloc_count; j++) { - drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j]; + drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *) target_bo; @@ -304,7 +330,7 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) } } -static void +static inline void drm_intel_gem_bo_reference(drm_intel_bo *bo) { drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; @@ -359,6 +385,55 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo) bufmgr_gem->exec_count++; } +static void +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int index; + + if (bo_gem->validate_index != -1) { + if (need_fence) + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= + EXEC_OBJECT_NEEDS_FENCE; + return; + } + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { + int new_size = bufmgr_gem->exec_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->exec2_objects = + realloc(bufmgr_gem->exec2_objects, + sizeof(*bufmgr_gem->exec2_objects) * new_size); + bufmgr_gem->exec_bos = + realloc(bufmgr_gem->exec_bos, + sizeof(*bufmgr_gem->exec_bos) * new_size); + bufmgr_gem->exec_size = new_size; + } + + index = bufmgr_gem->exec_count; + bo_gem->validate_index = index; + /* Fill in array entry */ + bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; + bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->exec2_objects[index].alignment = 0; + bufmgr_gem->exec2_objects[index].offset = 0; + bufmgr_gem->exec_bos[index] = bo; + bufmgr_gem->exec2_objects[index].flags = 0; + bufmgr_gem->exec2_objects[index].rsvd1 = 0; + bufmgr_gem->exec2_objects[index].rsvd2 = 0; + if (need_fence) { + bufmgr_gem->exec2_objects[index].flags |= + EXEC_OBJECT_NEEDS_FENCE; + } + bufmgr_gem->exec_count++; +} + #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ sizeof(uint32_t)) @@ -377,7 +452,7 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, * aperture. Optimal packing is for wimps. */ size = bo_gem->bo.size; - if (!IS_I965G(bufmgr_gem) && bo_gem->tiling_mode != I915_TILING_NONE) + if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) size *= 2; bo_gem->reloc_tree_size = size; @@ -395,7 +470,19 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo) bo_gem->relocs = malloc(max_relocs * sizeof(struct drm_i915_gem_relocation_entry)); - bo_gem->reloc_target_bo = malloc(max_relocs * sizeof(drm_intel_bo *)); + bo_gem->reloc_target_info = malloc(max_relocs * + sizeof(drm_intel_reloc_target)); + if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { + bo_gem->has_error = 1; + + free (bo_gem->relocs); + bo_gem->relocs = NULL; + + free (bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + + return 1; + } return 0; } @@ -464,7 +551,9 @@ static drm_intel_bo * drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, unsigned long size, - unsigned long flags) + unsigned long flags, + uint32_t tiling_mode, + unsigned long stride) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; drm_intel_bo_gem *bo_gem; @@ -530,6 +619,13 @@ retry: bucket); goto retry; } + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) { + drm_intel_gem_bo_free(&bo_gem->bo); + goto retry; + } } } pthread_mutex_unlock(&bufmgr_gem->lock); @@ -557,6 +653,17 @@ retry: return NULL; } bo_gem->bo.bufmgr = bufmgr; + + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) { + drm_intel_gem_bo_free(&bo_gem->bo); + return NULL; + } } bo_gem->name = name; @@ -564,8 +671,7 @@ retry: bo_gem->validate_index = -1; bo_gem->reloc_tree_fences = 0; bo_gem->used_as_reloc_target = 0; - bo_gem->tiling_mode = I915_TILING_NONE; - bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->has_error = 0; bo_gem->reusable = 1; drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); @@ -583,7 +689,8 @@ drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, unsigned int alignment) { return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, - BO_ALLOC_FOR_RENDER); + BO_ALLOC_FOR_RENDER, + I915_TILING_NONE, 0); } static drm_intel_bo * @@ -592,7 +699,8 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, unsigned long size, unsigned int alignment) { - return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, + I915_TILING_NONE, 0); } static drm_intel_bo * @@ -602,33 +710,45 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; drm_intel_bo *bo; - unsigned long size, stride, aligned_y = y; - int ret; + unsigned long size, stride; + uint32_t tiling; - if (*tiling_mode == I915_TILING_NONE) - aligned_y = ALIGN(y, 2); - else if (*tiling_mode == I915_TILING_X) - aligned_y = ALIGN(y, 8); - else if (*tiling_mode == I915_TILING_Y) - aligned_y = ALIGN(y, 32); - - stride = x * cpp; - stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode); - size = stride * aligned_y; - size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); - - bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); + do { + unsigned long aligned_y; + + tiling = *tiling_mode; + + /* If we're tiled, our allocations are in 8 or 32-row blocks, + * so failure to align our height means that we won't allocate + * enough pages. + * + * If we're untiled, we still have to align to 2 rows high + * because the data port accesses 2x2 blocks even if the + * bottom row isn't to be rendered, so failure to align means + * we could walk off the end of the GTT and fault. This is + * documented on 965, and may be the case on older chipsets + * too so we try to be careful. + */ + aligned_y = y; + if (tiling == I915_TILING_NONE) + aligned_y = ALIGN(y, 2); + else if (tiling == I915_TILING_X) + aligned_y = ALIGN(y, 8); + else if (tiling == I915_TILING_Y) + aligned_y = ALIGN(y, 32); + + stride = x * cpp; + stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling); + size = stride * aligned_y; + size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); + } while (*tiling_mode != tiling); + + bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, + *tiling_mode, stride); if (!bo) return NULL; - ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); - if (ret != 0) { - drm_intel_gem_bo_unreference(bo); - return NULL; - } - *pitch = stride; - return bo; } @@ -686,10 +806,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, } bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; - if (bo_gem->tiling_mode == I915_TILING_NONE) - bo_gem->reloc_tree_fences = 0; - else - bo_gem->reloc_tree_fences = 1; + /* XXX stride is unknown */ drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); @@ -710,9 +827,6 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) if (bo_gem->gtt_virtual) munmap(bo_gem->gtt_virtual, bo_gem->bo.size); - free(bo_gem->reloc_target_bo); - free(bo_gem->relocs); - /* Close this object */ memset(&close, 0, sizeof(close)); close.handle = bo_gem->gem_handle; @@ -731,7 +845,7 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) { int i; - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { + for (i = 0; i < bufmgr_gem->num_buckets; i++) { struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; @@ -756,14 +870,15 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; struct drm_intel_gem_bo_bucket *bucket; - uint32_t tiling_mode; int i; /* Unreference all the target buffers */ for (i = 0; i < bo_gem->reloc_count; i++) { - drm_intel_gem_bo_unreference_locked_timed(bo_gem-> - reloc_target_bo[i], - time); + if (bo_gem->reloc_target_info[i].bo != bo) { + drm_intel_gem_bo_unreference_locked_timed(bo_gem-> + reloc_target_info[i].bo, + time); + } } bo_gem->reloc_count = 0; bo_gem->used_as_reloc_target = 0; @@ -771,11 +886,19 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) DBG("bo_unreference final: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); + /* release memory associated with this object */ + if (bo_gem->reloc_target_info) { + free(bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + } + if (bo_gem->relocs) { + free(bo_gem->relocs); + bo_gem->relocs = NULL; + } + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ - tiling_mode = I915_TILING_NONE; if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && - drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) { bo_gem->free_time = time; @@ -846,6 +969,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) &mmap_arg); } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, @@ -871,6 +995,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) &set_domain); } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", __FILE__, __LINE__, bo_gem->gem_handle, strerror(errno)); @@ -909,6 +1034,7 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) &mmap_arg); } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error preparing buffer map %d (%s): %s .\n", __FILE__, __LINE__, @@ -923,13 +1049,15 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) MAP_SHARED, bufmgr_gem->fd, mmap_arg.offset); if (bo_gem->gtt_virtual == MAP_FAILED) { + bo_gem->gtt_virtual = NULL; + ret = -errno; fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, bo_gem->name, strerror(errno)); pthread_mutex_unlock(&bufmgr_gem->lock); - return errno; + return ret; } } @@ -949,6 +1077,7 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", __FILE__, __LINE__, bo_gem->gem_handle, strerror(errno)); @@ -1000,10 +1129,12 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) DRM_IOCTL_I915_GEM_SW_FINISH, &sw_finish); } while (ret == -1 && errno == EINTR); + ret = ret == -1 ? -errno : 0; bo->virtual = NULL; pthread_mutex_unlock(&bufmgr_gem->lock); - return 0; + + return ret; } static int @@ -1026,12 +1157,14 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, &pwrite); } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, (int)size, strerror(errno)); } - return 0; + + return ret; } static int @@ -1077,12 +1210,14 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, &pread); } while (ret == -1 && errno == EINTR); if (ret != 0) { + ret = -errno; fprintf(stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, (int)size, strerror(errno)); } - return 0; + + return ret; } /** Waits for all GPU rendering to the object to have completed. */ @@ -1130,13 +1265,14 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; int i; + free(bufmgr_gem->exec2_objects); free(bufmgr_gem->exec_objects); free(bufmgr_gem->exec_bos); pthread_mutex_destroy(&bufmgr_gem->lock); /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { + for (i = 0; i < bufmgr_gem->num_buckets; i++) { struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; drm_intel_bo_gem *bo_gem; @@ -1163,19 +1299,33 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) * last known offset in target_bo. */ static int -drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, - drm_intel_bo *target_bo, uint32_t target_offset, - uint32_t read_domains, uint32_t write_domain) +do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain, + int need_fence) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; - pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = 1; + return -ENOMEM; + } + + if (target_bo_gem->tiling_mode == I915_TILING_NONE) + need_fence = 0; + + /* We never use HW fences for rendering on 965+ */ + if (bufmgr_gem->gen >= 4) + need_fence = 0; /* Create a new relocation list if needed */ - if (bo_gem->relocs == NULL) - drm_intel_setup_reloc_list(bo); + if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) + return -ENOMEM; /* Check overflow */ assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); @@ -1188,11 +1338,18 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, * already been accounted for. */ assert(!bo_gem->used_as_reloc_target); - bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + if (target_bo_gem != bo_gem) { + target_bo_gem->used_as_reloc_target = 1; + bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + } + /* An object needing a fence is a tiled buffer, so it won't have + * relocs to other buffers. + */ + if (need_fence) + target_bo_gem->reloc_tree_fences = 1; bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; /* Flag the target to disallow further relocations in it. */ - target_bo_gem->used_as_reloc_target = 1; bo_gem->relocs[bo_gem->reloc_count].offset = offset; bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; @@ -1202,16 +1359,42 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; - bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; - drm_intel_gem_bo_reference(target_bo); + bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; + if (target_bo != bo) + drm_intel_gem_bo_reference(target_bo); + if (need_fence) + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = + DRM_INTEL_RELOC_FENCE; + else + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; bo_gem->reloc_count++; - pthread_mutex_unlock(&bufmgr_gem->lock); - return 0; } +static int +drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, + !bufmgr_gem->fenced_relocs); +} + +static int +drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, 1); +} + /** * Walk the tree of relocations rooted at BO and accumulate the list of * validations to be performed and update the relocation buffers with @@ -1227,7 +1410,10 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) return; for (i = 0; i < bo_gem->reloc_count; i++) { - drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i]; + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; + + if (target_bo == bo) + continue; /* Continue walking the tree depth-first. */ drm_intel_gem_bo_process_reloc(target_bo); @@ -1238,6 +1424,34 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) } static void +drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int i; + + if (bo_gem->relocs == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; + int need_fence; + + if (target_bo == bo) + continue; + + /* Continue walking the tree depth-first. */ + drm_intel_gem_bo_process_reloc2(target_bo); + + need_fence = (bo_gem->reloc_target_info[i].flags & + DRM_INTEL_RELOC_FENCE); + + /* Add the target to the validate list */ + drm_intel_add_validate_buffer2(target_bo, need_fence); + } +} + + +static void drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) { int i; @@ -1257,14 +1471,37 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) } } +static void +drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + + /* Update the buffer offset */ + if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { + DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", + bo_gem->gem_handle, bo_gem->name, bo->offset, + (unsigned long long)bufmgr_gem->exec2_objects[i].offset); + bo->offset = bufmgr_gem->exec2_objects[i].offset; + } + } +} + static int drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, drm_clip_rect_t * cliprects, int num_cliprects, int DR4) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; struct drm_i915_gem_execbuffer execbuf; int ret, i; + if (bo_gem->has_error) + return -ENOMEM; + pthread_mutex_lock(&bufmgr_gem->lock); /* Update indices and set up the validate list. */ drm_intel_gem_bo_process_reloc(bo); @@ -1287,19 +1524,22 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER, &execbuf); - } while (ret != 0 && errno == EAGAIN); + } while (ret != 0 && errno == EINTR); - if (ret != 0 && errno == ENOMEM) { - fprintf(stderr, - "Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - (unsigned int)bufmgr_gem->gtt_size); + if (ret != 0) { + ret = -errno; + if (errno == ENOSPC) { + fprintf(stderr, + "Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + (unsigned int)bufmgr_gem->gtt_size); + } } drm_intel_update_buffer_offsets(bufmgr_gem); @@ -1317,7 +1557,87 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, bufmgr_gem->exec_count = 0; pthread_mutex_unlock(&bufmgr_gem->lock); - return 0; + return ret; +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int ring_flag) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + struct drm_i915_gem_execbuffer2 execbuf; + int ret, i; + + if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD)) + return -EINVAL; + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc2(bo); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + drm_intel_add_validate_buffer2(bo, 0); + + execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t)cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; + execbuf.flags = ring_flag; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + + do { + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + ret = -errno; + if (ret == -ENOMEM) { + fprintf(stderr, + "Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + (unsigned int) bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets2(bufmgr_gem); + + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; + } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int +drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4) +{ + return drm_intel_gem_bo_mrb_exec2(bo, used, + cliprects, num_cliprects, DR4, + I915_EXEC_RENDER); } static int @@ -1364,46 +1684,55 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo) } static int -drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, - uint32_t stride) +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; struct drm_i915_gem_set_tiling set_tiling; int ret; - if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) + if (tiling_mode == bo_gem->tiling_mode && + stride == bo_gem->stride) return 0; - /* If we're going from non-tiling to tiling, bump fence count */ - if (bo_gem->tiling_mode == I915_TILING_NONE) - bo_gem->reloc_tree_fences++; - memset(&set_tiling, 0, sizeof(set_tiling)); - set_tiling.handle = bo_gem->gem_handle; - set_tiling.tiling_mode = *tiling_mode; - set_tiling.stride = stride; - do { + set_tiling.handle = bo_gem->gem_handle; + set_tiling.tiling_mode = tiling_mode; + set_tiling.stride = stride; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && errno == EINTR); - if (ret != 0) { - *tiling_mode = bo_gem->tiling_mode; + if (ret == -1) return -errno; - } + bo_gem->tiling_mode = set_tiling.tiling_mode; bo_gem->swizzle_mode = set_tiling.swizzle_mode; + bo_gem->stride = stride; + return 0; +} - /* If we're going from tiling to non-tiling, drop fence count */ - if (bo_gem->tiling_mode == I915_TILING_NONE) - bo_gem->reloc_tree_fences--; +static int +drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + if (bo_gem->global_name == 0) + return 0; + + ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); + if (ret == 0) + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); *tiling_mode = bo_gem->tiling_mode; - return 0; + return ret; } static int @@ -1456,6 +1785,22 @@ drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) } /** + * Enable use of fenced reloc type. + * + * New code should enable this to avoid unnecessary fence register + * allocation. If this option is not enabled, all relocs will have fence + * register allocated. + */ +void +drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) + bufmgr_gem->fenced_relocs = 1; +} + +/** * Return the additional aperture space required by the tree of buffer objects * rooted at bo. */ @@ -1475,7 +1820,7 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) for (i = 0; i < bo_gem->reloc_count; i++) total += drm_intel_gem_bo_get_aperture_space(bo_gem-> - reloc_target_bo[i]); + reloc_target_info[i].bo); return total; } @@ -1522,7 +1867,7 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) for (i = 0; i < bo_gem->reloc_count; i++) drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> - reloc_target_bo[i]); + reloc_target_info[i].bo); } /** @@ -1606,7 +1951,7 @@ drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) if (bufmgr_gem->available_fences) { total_fences = drm_intel_gem_total_fences(bo_array, count); if (total_fences > bufmgr_gem->available_fences) - return -1; + return -ENOSPC; } total = drm_intel_gem_estimate_batch_space(bo_array, count); @@ -1618,7 +1963,7 @@ drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) DBG("check_space: overflowed available aperture, " "%dkb vs %dkb\n", total / 1024, (int)bufmgr_gem->gtt_size / 1024); - return -1; + return -ENOSPC; } else { DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, (int)bufmgr_gem->gtt_size / 1024); @@ -1640,15 +1985,25 @@ drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) } static int +drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reusable; +} + +static int _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) { drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; int i; for (i = 0; i < bo_gem->reloc_count; i++) { - if (bo_gem->reloc_target_bo[i] == target_bo) + if (bo_gem->reloc_target_info[i].bo == target_bo) return 1; - if (_drm_intel_gem_bo_references(bo_gem->reloc_target_bo[i], + if (bo == bo_gem->reloc_target_info[i].bo) + continue; + if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, target_bo)) return 1; } @@ -1669,6 +2024,45 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) return 0; } +static void +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) +{ + unsigned int i = bufmgr_gem->num_buckets; + + assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); + + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); + bufmgr_gem->cache_bucket[i].size = size; + bufmgr_gem->num_buckets++; +} + +static void +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) +{ + unsigned long size, cache_max_size = 64 * 1024 * 1024; + + /* OK, so power of two buckets was too wasteful of memory. + * Give 3 other sizes between each power of two, to hopefully + * cover things accurately enough. (The alternative is + * probably to just go for exact matching of sizes, and assume + * that for things like composited window resize the tiled + * width/height alignment and rounding of sizes to pages will + * get us useful cache hit rates anyway) + */ + add_bucket(bufmgr_gem, 4096); + add_bucket(bufmgr_gem, 4096 * 2); + add_bucket(bufmgr_gem, 4096 * 3); + + /* Initialize the linked lists for BO reuse cache. */ + for (size = 4 * 4096; size <= cache_max_size; size *= 2) { + add_bucket(bufmgr_gem, size); + + add_bucket(bufmgr_gem, size + size * 1 / 4); + add_bucket(bufmgr_gem, size + size * 2 / 4); + add_bucket(bufmgr_gem, size + size * 3 / 4); + } +} + /** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -1681,10 +2075,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) drm_intel_bufmgr_gem *bufmgr_gem; struct drm_i915_gem_get_aperture aperture; drm_i915_getparam_t gp; - int ret, i; - unsigned long size; + int ret; + int exec2 = 0, has_bsd = 0; bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + if (bufmgr_gem == NULL) + return NULL; + bufmgr_gem->fd = fd; if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { @@ -1714,7 +2111,26 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); } - if (!IS_I965G(bufmgr_gem)) { + if (IS_GEN2(bufmgr_gem)) + bufmgr_gem->gen = 2; + else if (IS_GEN3(bufmgr_gem)) + bufmgr_gem->gen = 3; + else if (IS_GEN4(bufmgr_gem)) + bufmgr_gem->gen = 4; + else + bufmgr_gem->gen = 6; + + gp.param = I915_PARAM_HAS_EXECBUF2; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (!ret) + exec2 = 1; + + gp.param = I915_PARAM_HAS_BSD; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (!ret) + has_bsd = 1; + + if (bufmgr_gem->gen < 4) { gp.param = I915_PARAM_NUM_FENCES_AVAIL; gp.value = &bufmgr_gem->available_fences; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); @@ -1724,6 +2140,19 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); bufmgr_gem->available_fences = 0; + } else { + /* XXX The kernel reports the total number of fences, + * including any that may be pinned. + * + * We presume that there will be at least one pinned + * fence for the scanout buffer, but there may be more + * than one scanout and the user may be manually + * pinning buffers. Let's move to execbuffer2 and + * thereby forget the insanity of using fences... + */ + bufmgr_gem->available_fences -= 2; + if (bufmgr_gem->available_fences < 0) + bufmgr_gem->available_fences = 0; } } @@ -1747,12 +2176,19 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; + bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; - bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; + /* Use the new one if available */ + if (exec2) { + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; + if (has_bsd) + bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; + } else + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; @@ -1760,15 +2196,12 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; + bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id; bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; - /* Initialize the linked lists for BO reuse cache. */ - for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { - DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); - bufmgr_gem->cache_bucket[i].size = size; - } + init_cache_buckets(bufmgr_gem); return &bufmgr_gem->bufmgr; }