From 76446cac68568fc7f5168a27deaf803ed22a4360 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 17 Dec 2009 22:05:42 -0500 Subject: [PATCH] drm/i915: execbuf2 support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch adds a new execbuf ioctl, execbuf2, for use by clients that want to control fence register allocation more finely. The buffer passed in to the new ioctl includes a new relocation type to indicate whether a given object needs a fence register assigned for the command buffer in question. Compatibility with the existing execbuf ioctl is implemented in terms of the new code, preserving the assumption that fence registers are required for pre-965 rendering commands. Signed-off-by: Jesse Barnes [ickle: Remove pre-emptive clear_fence_reg()] Signed-off-by: Chris Wilson Signed-off-by: Kristian Høgsberg [anholt: Removed dmesg spam] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_dma.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_gem.c | 239 +++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_tiling.c | 46 +++---- include/drm/i915_drm.h | 54 ++++++++ 5 files changed, 273 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e3e5d50..d67be65 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -813,9 +813,13 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_PAGEFLIPPING: value = 1; break; + case I915_PARAM_HAS_EXECBUF2: + /* depends on GEM */ + value = dev_priv->has_gem; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", - param->param); + param->param); return -EINVAL; } @@ -1646,6 +1650,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), + DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH), DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9a05f1a..7eb4ad5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -815,6 +815,8 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, @@ -881,6 +883,9 @@ void i915_gem_shrinker_exit(void); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj); +bool i915_tiling_ok(struct drm_device *dev, int stride, int size, + int tiling_mode); +bool i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj); /* i915_gem_debug.c */ void i915_gem_dump_object(struct drm_gem_object *obj, int len, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e81a0d..0330c3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3199,7 +3199,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, static int i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_file *file_priv, - struct drm_i915_gem_exec_object *entry, + struct drm_i915_gem_exec_object2 *entry, struct drm_i915_gem_relocation_entry *relocs) { struct drm_device *dev = obj->dev; @@ -3207,12 +3207,35 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_i915_gem_object *obj_priv = obj->driver_private; int i, ret; void __iomem *reloc_page; + bool need_fence; + + need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj_priv->tiling_mode != I915_TILING_NONE; + + /* Check fence reg constraints and rebind if necessary */ + if (need_fence && !i915_obj_fenceable(dev, obj)) + i915_gem_object_unbind(obj); /* Choose the GTT offset for our buffer and put it there. */ ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); if (ret) return ret; + /* + * Pre-965 chips need a fence register set up in order to + * properly handle blits to/from tiled surfaces. + */ + if (need_fence) { + ret = i915_gem_object_get_fence_reg(obj); + if (ret != 0) { + if (ret != -EBUSY && ret != -ERESTARTSYS) + DRM_ERROR("Failure to install fence: %d\n", + ret); + i915_gem_object_unpin(obj); + return ret; + } + } + entry->offset = obj_priv->gtt_offset; /* Apply the relocations, using the GTT aperture to avoid cache @@ -3374,7 +3397,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, */ static int i915_dispatch_gem_execbuffer(struct drm_device *dev, - struct drm_i915_gem_execbuffer *exec, + struct drm_i915_gem_execbuffer2 *exec, struct drm_clip_rect *cliprects, uint64_t exec_offset) { @@ -3464,7 +3487,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) } static int -i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, +i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, uint32_t buffer_count, struct drm_i915_gem_relocation_entry **relocs) { @@ -3479,8 +3502,10 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, } *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); - if (*relocs == NULL) + if (*relocs == NULL) { + DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); return -ENOMEM; + } for (i = 0; i < buffer_count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; @@ -3504,7 +3529,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, } static int -i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, +i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, uint32_t buffer_count, struct drm_i915_gem_relocation_entry *relocs) { @@ -3537,7 +3562,7 @@ err: } static int -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, +i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, uint64_t exec_offset) { uint32_t exec_start, exec_len; @@ -3590,18 +3615,18 @@ i915_gem_wait_for_pending_flip(struct drm_device *dev, } int -i915_gem_execbuffer(struct drm_device *dev, void *data, - struct drm_file *file_priv) +i915_gem_do_execbuffer(struct drm_device *dev, void *data, + struct drm_file *file_priv, + struct drm_i915_gem_execbuffer2 *args, + struct drm_i915_gem_exec_object2 *exec_list) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_gem_object **object_list = NULL; struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; struct drm_clip_rect *cliprects = NULL; struct drm_i915_gem_relocation_entry *relocs; - int ret, ret2, i, pinned = 0; + int ret = 0, ret2, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; int pin_tries, flips; @@ -3615,25 +3640,13 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; } - /* Copy in the exec list from userland */ - exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count); - if (exec_list == NULL || object_list == NULL) { - DRM_ERROR("Failed to allocate exec or object list " - "for %d buffers\n", + if (object_list == NULL) { + DRM_ERROR("Failed to allocate object list for %d buffers\n", args->buffer_count); ret = -ENOMEM; goto pre_mutex_err; } - ret = copy_from_user(exec_list, - (struct drm_i915_relocation_entry __user *) - (uintptr_t) args->buffers_ptr, - sizeof(*exec_list) * args->buffer_count); - if (ret != 0) { - DRM_ERROR("copy %d exec entries failed %d\n", - args->buffer_count, ret); - goto pre_mutex_err; - } if (args->num_cliprects != 0) { cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), @@ -3885,20 +3898,6 @@ err: mutex_unlock(&dev->struct_mutex); - if (!ret) { - /* Copy the new buffer offsets back to the user's exec list. */ - ret = copy_to_user((struct drm_i915_relocation_entry __user *) - (uintptr_t) args->buffers_ptr, - exec_list, - sizeof(*exec_list) * args->buffer_count); - if (ret) { - ret = -EFAULT; - DRM_ERROR("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); - } - } - /* Copy the updated relocations out regardless of current error * state. Failure to update the relocs would mean that the next * time userland calls execbuf, it would do so with presumed offset @@ -3915,12 +3914,158 @@ err: pre_mutex_err: drm_free_large(object_list); - drm_free_large(exec_list); kfree(cliprects); return ret; } +/* + * Legacy execbuffer just creates an exec2 list from the original exec object + * list array and passes it to the real function. + */ +int +i915_gem_execbuffer(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_execbuffer *args = data; + struct drm_i915_gem_execbuffer2 exec2; + struct drm_i915_gem_exec_object *exec_list = NULL; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + int ret, i; + +#if WATCH_EXEC + DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", + (int) args->buffers_ptr, args->buffer_count, args->batch_len); +#endif + + if (args->buffer_count < 1) { + DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + /* Copy in the exec list from userland */ + exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); + exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); + if (exec_list == NULL || exec2_list == NULL) { + DRM_ERROR("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + drm_free_large(exec_list); + drm_free_large(exec2_list); + return -ENOMEM; + } + ret = copy_from_user(exec_list, + (struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + sizeof(*exec_list) * args->buffer_count); + if (ret != 0) { + DRM_ERROR("copy %d exec entries failed %d\n", + args->buffer_count, ret); + drm_free_large(exec_list); + drm_free_large(exec2_list); + return -EFAULT; + } + + for (i = 0; i < args->buffer_count; i++) { + exec2_list[i].handle = exec_list[i].handle; + exec2_list[i].relocation_count = exec_list[i].relocation_count; + exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; + exec2_list[i].alignment = exec_list[i].alignment; + exec2_list[i].offset = exec_list[i].offset; + if (!IS_I965G(dev)) + exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; + else + exec2_list[i].flags = 0; + } + + exec2.buffers_ptr = args->buffers_ptr; + exec2.buffer_count = args->buffer_count; + exec2.batch_start_offset = args->batch_start_offset; + exec2.batch_len = args->batch_len; + exec2.DR1 = args->DR1; + exec2.DR4 = args->DR4; + exec2.num_cliprects = args->num_cliprects; + exec2.cliprects_ptr = args->cliprects_ptr; + exec2.flags = 0; + + ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + for (i = 0; i < args->buffer_count; i++) + exec_list[i].offset = exec2_list[i].offset; + /* ... and back out to userspace */ + ret = copy_to_user((struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + exec_list, + sizeof(*exec_list) * args->buffer_count); + if (ret) { + ret = -EFAULT; + DRM_ERROR("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + } else { + DRM_ERROR("i915_gem_do_execbuffer returns %d\n", ret); + } + + drm_free_large(exec_list); + drm_free_large(exec2_list); + return ret; +} + +int +i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + int ret; + +#if WATCH_EXEC + DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", + (int) args->buffers_ptr, args->buffer_count, args->batch_len); +#endif + + if (args->buffer_count < 1) { + DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); + if (exec2_list == NULL) { + DRM_ERROR("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + return -ENOMEM; + } + ret = copy_from_user(exec2_list, + (struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + sizeof(*exec2_list) * args->buffer_count); + if (ret != 0) { + DRM_ERROR("copy %d exec entries failed %d\n", + args->buffer_count, ret); + drm_free_large(exec2_list); + return -EFAULT; + } + + ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + ret = copy_to_user((struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + exec2_list, + sizeof(*exec2_list) * args->buffer_count); + if (ret) { + ret = -EFAULT; + DRM_ERROR("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + } + + drm_free_large(exec2_list); + return ret; +} + int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) { @@ -3934,19 +4079,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) if (ret) return ret; } - /* - * Pre-965 chips need a fence register set up in order to - * properly handle tiled surfaces. - */ - if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) { - ret = i915_gem_object_get_fence_reg(obj); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to install fence: %d\n", - ret); - return ret; - } - } + obj_priv->pin_count++; /* If the object is not active and not pending a flush, diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 30d6af6..df278b2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -304,35 +304,39 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) /** - * Returns the size of the fence for a tiled object of the given size. + * Returns whether an object is currently fenceable. If not, it may need + * to be unbound and have its pitch adjusted. */ -static int -i915_get_fence_size(struct drm_device *dev, int size) +bool +i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj) { - int i; - int start; + struct drm_i915_gem_object *obj_priv = obj->driver_private; if (IS_I965G(dev)) { /* The 965 can have fences at any page boundary. */ - return ALIGN(size, 4096); + if (obj->size & 4095) + return false; + return true; + } else if (IS_I9XX(dev)) { + if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK) + return false; } else { - /* Align the size to a power of two greater than the smallest - * fence size. - */ - if (IS_I9XX(dev)) - start = 1024 * 1024; - else - start = 512 * 1024; + if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK) + return false; + } - for (i = start; i < size; i <<= 1) - ; + /* Power of two sized... */ + if (obj->size & (obj->size - 1)) + return false; - return i; - } + /* Objects must be size aligned as well */ + if (obj_priv->gtt_offset & (obj->size - 1)) + return false; + return true; } /* Check pitch constriants for all chips & tiling formats */ -static bool +bool i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) { int tile_width; @@ -384,12 +388,6 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (stride & (stride - 1)) return false; - /* We don't 0handle the aperture area covered by the fence being bigger - * than the object size. - */ - if (i915_get_fence_size(dev, size) != size) - return false; - return true; } diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index ec3f5e80..b64a8d7 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -188,6 +188,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_MADVISE 0x26 #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 +#define DRM_I915_GEM_EXECBUFFER2 0x29 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -207,6 +208,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -272,6 +274,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_NUM_FENCES_AVAIL 6 #define I915_PARAM_HAS_OVERLAY 7 #define I915_PARAM_HAS_PAGEFLIPPING 8 +#define I915_PARAM_HAS_EXECBUF2 9 typedef struct drm_i915_getparam { int param; @@ -567,6 +570,57 @@ struct drm_i915_gem_execbuffer { __u64 cliprects_ptr; }; +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) + __u64 flags; + __u64 rsvd1; + __u64 rsvd2; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; + __u64 flags; /* currently unused */ + __u64 rsvd1; + __u64 rsvd2; +}; + struct drm_i915_gem_pin { /** Handle of the buffer to be pinned. */ __u32 handle; -- 2.7.4