From 9af4c497433398fa4576a7c1c31036448cf4f24c Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 8 May 2008 10:44:02 -0700 Subject: [PATCH] [intel-gem] Move domains to relocation records. add set_domain ioctl. Domain information is about buffer relationships, not buffer contents. That means a relocation contains the domain information as it knows how the source buffer references the target buffer. This also adds the set_domain ioctl so that user space can move buffers to the cpu domain. --- linux-core/drmP.h | 18 ++++ linux-core/drm_drv.c | 1 + linux-core/drm_gem.c | 35 +++++++- linux-core/i915_drv.c | 1 + linux-core/i915_gem.c | 235 +++++++++++++++++++++++++++++++++++-------------- shared-core/i915_drm.h | 28 +++--- shared-core/i915_drv.h | 3 + 7 files changed, 238 insertions(+), 83 deletions(-) diff --git a/linux-core/drmP.h b/linux-core/drmP.h index cdeecc3..11688cd 100644 --- a/linux-core/drmP.h +++ b/linux-core/drmP.h @@ -652,6 +652,15 @@ struct drm_gem_object { uint32_t read_domains; uint32_t write_domain; + /** + * While validating an exec operation, the + * new read/write domain values are computed here. + * They will be transferred to the above values + * at the point that any cache flushing occurs + */ + uint32_t pending_read_domains; + uint32_t pending_write_domain; + void *driver_private; }; @@ -765,6 +774,13 @@ struct drm_driver { int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); + /** + * Driver-specific callback to set memory domains from userspace + */ + int (*gem_set_domain) (struct drm_gem_object *obj, + uint32_t read_domains, + uint32_t write_domain); + struct drm_fence_driver *fence_driver; struct drm_bo_driver *bo_driver; @@ -1392,6 +1408,8 @@ int drm_gem_name_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int drm_gem_set_domain_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); void drm_gem_open(struct drm_device *dev, struct drm_file *file_private); void drm_gem_release(struct drm_device *dev, struct drm_file *file_private); diff --git a/linux-core/drm_drv.c b/linux-core/drm_drv.c index 4c85cdf..16c38db 100644 --- a/linux-core/drm_drv.c +++ b/linux-core/drm_drv.c @@ -158,6 +158,7 @@ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_GEM_MMAP, drm_gem_mmap_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_GEM_NAME, drm_gem_name_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH), + DRM_IOCTL_DEF(DRM_IOCTL_GEM_SET_DOMAIN, drm_gem_set_domain_ioctl, DRM_AUTH), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/linux-core/drm_gem.c b/linux-core/drm_gem.c index 4eaeffc..3673c93 100644 --- a/linux-core/drm_gem.c +++ b/linux-core/drm_gem.c @@ -325,10 +325,6 @@ drm_gem_mmap_ioctl(struct drm_device *dev, void *data, if (IS_ERR((void *)addr)) return addr; - /* XXX hack until we have a driver callback to make this work */ - obj->read_domains = DRM_GEM_DOMAIN_CPU; - obj->write_domain = DRM_GEM_DOMAIN_CPU; - args->addr_ptr = (uint64_t) addr; return 0; @@ -461,6 +457,37 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data, } /** + * Called when user space prepares to use an object + */ +int +drm_gem_set_domain_ioctl (struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_gem_set_domain *args = data; + struct drm_gem_object *obj; + int ret; + + if (!(dev->driver->driver_features & DRIVER_GEM)) + return -ENODEV; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return -EINVAL; + + if (dev->driver->gem_set_domain) { + ret = dev->driver->gem_set_domain (obj, + args->read_domains, + args->write_domain); + } else { + obj->read_domains = args->read_domains; + obj->write_domain = args->write_domain; + ret = 0; + } + drm_gem_object_unreference (obj); + return ret; +} + +/** * Called at device open time, sets up the structure for handling refcounting * of mm objects. */ diff --git a/linux-core/i915_drv.c b/linux-core/i915_drv.c index 3e788d2..ae8cf3e 100644 --- a/linux-core/i915_drv.c +++ b/linux-core/i915_drv.c @@ -588,6 +588,7 @@ static struct drm_driver driver = { .ioctls = i915_ioctls, .gem_init_object = i915_gem_init_object, .gem_free_object = i915_gem_free_object, + .gem_set_domain = i915_gem_set_domain_ioctl, .fops = { .owner = THIS_MODULE, .open = drm_open, diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index 1d55eaa..861e7bb 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -33,6 +33,7 @@ #define WATCH_BUF 0 #define WATCH_EXEC 0 #define WATCH_LRU 0 +#define WATCH_RELOC 0 int i915_gem_init_ioctl(struct drm_device *dev, void *data, @@ -75,54 +76,61 @@ i915_gem_object_free_page_list(struct drm_gem_object *obj) } static void -i915_gem_flush(struct drm_device *dev, uint32_t domains) +i915_gem_flush(struct drm_device *dev, uint32_t invalidate_domains, uint32_t flush_domains) { drm_i915_private_t *dev_priv = dev->dev_private; uint32_t cmd; RING_LOCALS; #if WATCH_EXEC - DRM_INFO ("%s: flush %08x\n", __FUNCTION__, domains); + DRM_INFO ("%s: invalidate %08x flush %08x\n", __FUNCTION__, + invalidate_domains, flush_domains); #endif - /* read/write caches: - * DRM_GEM_DOMAIN_I915_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is also - * flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * DRM_GEM_DOMAIN_I915_SAMPLER is flushed on pre-965 if MI_READ_FLUSH - * is set, and is always flushed on 965. - * DRM_GEM_DOMAIN_I915_COMMAND may not exist? - * DRM_GEM_DOMAIN_I915_INSTRUCTION, which exists on 965, is invalidated - * when MI_EXE_FLUSH is set. - * DRM_GEM_DOMAIN_I915_VERTEX, which exists on 965, is invalidated with - * every MI_FLUSH. - * - * TLBs: - * On 965, TLBs associated with DRM_GEM_DOMAIN_I915_COMMAND and - * DRM_GEM_DOMAIN_CPU in are invalidated at PTE write and - * DRM_GEM_DOMAIN_I915_RENDER and DRM_GEM_DOMAIN_I915_SAMPLER are - * flushed at any MI_FLUSH. - */ - - cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH; - if (domains & DRM_GEM_DOMAIN_I915_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (!IS_I965G(dev)) { - /* On the 965, the sampler cache always gets flushed and this - * bit is reserved. + if (flush_domains & DRM_GEM_DOMAIN_CPU) + drm_agp_chipset_flush(dev); + + if ((invalidate_domains|flush_domains) & ~DRM_GEM_DOMAIN_CPU) + { + /* read/write caches: + * DRM_GEM_DOMAIN_I915_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is also + * flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * DRM_GEM_DOMAIN_I915_SAMPLER is flushed on pre-965 if MI_READ_FLUSH + * is set, and is always flushed on 965. + * DRM_GEM_DOMAIN_I915_COMMAND may not exist? + * DRM_GEM_DOMAIN_I915_INSTRUCTION, which exists on 965, is invalidated + * when MI_EXE_FLUSH is set. + * DRM_GEM_DOMAIN_I915_VERTEX, which exists on 965, is invalidated with + * every MI_FLUSH. + * + * TLBs: + * On 965, TLBs associated with DRM_GEM_DOMAIN_I915_COMMAND and + * DRM_GEM_DOMAIN_CPU in are invalidated at PTE write and + * DRM_GEM_DOMAIN_I915_RENDER and DRM_GEM_DOMAIN_I915_SAMPLER are + * flushed at any MI_FLUSH. */ - if (domains & DRM_GEM_DOMAIN_I915_SAMPLER) - cmd |= MI_READ_FLUSH; + + cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH; + if ((invalidate_domains|flush_domains) & DRM_GEM_DOMAIN_I915_RENDER) + cmd &= ~MI_NO_WRITE_FLUSH; + if (!IS_I965G(dev)) { + /* On the 965, the sampler cache always gets flushed and this + * bit is reserved. + */ + if (invalidate_domains & DRM_GEM_DOMAIN_I915_SAMPLER) + cmd |= MI_READ_FLUSH; + } + if (invalidate_domains & DRM_GEM_DOMAIN_I915_INSTRUCTION) + cmd |= MI_EXE_FLUSH; + + BEGIN_LP_RING(2); + OUT_RING(cmd); + OUT_RING(0); /* noop */ + ADVANCE_LP_RING(); } - if (domains & DRM_GEM_DOMAIN_I915_INSTRUCTION) - cmd |= MI_EXE_FLUSH; - - BEGIN_LP_RING(2); - OUT_RING(cmd); - OUT_RING(0); /* noop */ - ADVANCE_LP_RING(); } /** @@ -145,8 +153,10 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) DRM_INFO ("%s: flushing object %p from write domain %08x\n", __FUNCTION__, obj, obj->write_domain); #endif - i915_gem_flush (dev, obj->write_domain); + i915_gem_flush (dev, 0, obj->write_domain); obj->write_domain = 0; + if (obj_priv->last_rendering_cookie == 0) + drm_gem_object_reference (obj); obj_priv->last_rendering_cookie = i915_emit_irq (dev); } /* If there is rendering queued on the buffer being evicted, wait for @@ -162,6 +172,9 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) return ret; /* Clear it now that we know it's passed. */ obj_priv->last_rendering_cookie = 0; + + /* The cookie held a reference to the object, release that now */ + drm_gem_object_unreference (obj); } return 0; @@ -194,10 +207,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) drm_memrange_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; if (!list_empty (&obj_priv->gtt_lru_entry)) - { list_del_init(&obj_priv->gtt_lru_entry); - drm_gem_object_unreference (obj); - } } #if WATCH_BUF | WATCH_EXEC @@ -403,11 +413,9 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) static void i915_gem_clflush_object (struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; drm_ttm_cache_flush (obj_priv->page_list, obj->size / PAGE_SIZE); - drm_agp_chipset_flush(dev); } /* @@ -463,8 +471,32 @@ i915_gem_object_set_domain (struct drm_gem_object *obj, obj->write_domain = write_domain; obj->read_domains = read_domains; - dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU; - dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU; + dev->invalidate_domains |= invalidate_domains; + dev->flush_domains |= flush_domains; +} + +/** + * Once all of the objects have been set in the proper domain, + * perform the necessary flush and invalidate operations + */ + +static void +i915_gem_dev_set_domain (struct drm_device *dev) +{ + /* + * Now that all the buffers are synced to the proper domains, + * flush and invalidate the collected domains + */ + if (dev->invalidate_domains | dev->flush_domains) + { +#if WATCH_EXEC + DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n", + __FUNCTION__, dev->invalidate_domains, dev->flush_domains); +#endif + i915_gem_flush (dev, dev->invalidate_domains, dev->flush_domains); + dev->invalidate_domains = 0; + dev->flush_domains = 0; + } } static int @@ -488,17 +520,13 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj, return -ENOMEM; } - /* Do domain migration */ - i915_gem_object_set_domain (obj, entry->read_domains, entry->write_domain); - entry->buffer_offset = obj_priv->gtt_offset; if (obj_priv->pin_count == 0) { /* Move our buffer to the head of the LRU. */ - if (list_empty (&obj_priv->gtt_lru_entry)) { - drm_gem_object_reference (obj); + if (list_empty (&obj_priv->gtt_lru_entry)) list_add_tail(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru); - } else + else list_move_tail(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru); #if WATCH_LRU && 0 i915_dump_lru (dev, __FUNCTION__); @@ -536,16 +564,44 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj, } if (reloc.offset > obj->size - 4) { - DRM_ERROR("Relocation beyond object bounds.\n"); + DRM_ERROR("Relocation beyond object bounds: obj %p target %d offset %d size %d.\n", + obj, reloc.target_handle, (int) reloc.offset, (int) obj->size); drm_gem_object_unreference (target_obj); return -EINVAL; } if (reloc.offset & 3) { - DRM_ERROR("Relocation not 4-byte aligned.\n"); + DRM_ERROR("Relocation not 4-byte aligned: obj %p target %d offset %d.\n", + obj, reloc.target_handle, (int) reloc.offset); drm_gem_object_unreference (target_obj); return -EINVAL; } + if (reloc.write_domain && target_obj->pending_write_domain && + reloc.write_domain != target_obj->pending_write_domain) + { + DRM_ERROR("Write domain conflict: obj %p target %d offset %d new %08x old %08x\n", + obj, reloc.target_handle, (int) reloc.offset, + reloc.write_domain, target_obj->pending_write_domain); + drm_gem_object_unreference (target_obj); + return -EINVAL; + } + +#if WATCH_RELOC + DRM_INFO ("%s: obj %p offset %08x target %d read %08x write %08x gtt %08x presumed %08x delta %08x\n", + __FUNCTION__, + obj, + (int) reloc.offset, + (int) reloc.target_handle, + (int) reloc.read_domains, + (int) reloc.write_domain, + (int) target_obj_priv->gtt_offset, + (int) reloc.presumed_offset, + reloc.delta); +#endif + + target_obj->pending_read_domains |= reloc.read_domains; + target_obj->pending_write_domain |= reloc.write_domain; + /* If the relocation already has the right value in it, no * more work needs to be done. */ @@ -558,6 +614,16 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj, */ i915_gem_object_wait_rendering(obj); + /* As we're writing through the gtt, flush + * any CPU writes before we write the relocations + */ + if (obj->write_domain & DRM_GEM_DOMAIN_CPU) + { + i915_gem_clflush_object (obj); + drm_agp_chipset_flush(dev); + obj->write_domain = 0; + } + /* Map the page containing the relocation we're going to * perform. */ @@ -672,6 +738,19 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, LOCK_TEST_WITH_RETURN(dev, file_priv); +#if 0 + /* + * XXX wait for previous rendering to complete as we otherwise never + * flush the LRU list + */ + { + drm_i915_private_t *dev_priv = dev->dev_private; + + while (!list_empty (&dev_priv->mm.gtt_lru)) + i915_gem_evict_something (dev); + } +#endif + #if WATCH_EXEC DRM_INFO ("buffers_ptr %d buffer_count %d len %08x\n", (int) args->buffers_ptr, args->buffer_count, args->batch_len); @@ -717,6 +796,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } } + /* Set the pending read domains for the batch buffer to COMMAND */ + object_list[args->buffer_count-1]->pending_read_domains = DRM_GEM_DOMAIN_I915_COMMAND; + object_list[args->buffer_count-1]->pending_write_domain = 0; + for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; struct drm_i915_gem_object *obj_priv = obj->driver_private; @@ -730,22 +813,19 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, ret = -ENOMEM; goto err; } - } - if (dev->invalidate_domains | dev->flush_domains) - { -#if WATCH_EXEC - DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n", - __FUNCTION__, dev->invalidate_domains, dev->flush_domains); -#endif - i915_gem_flush (dev, dev->invalidate_domains | dev->flush_domains); - dev->invalidate_domains = 0; - dev->flush_domains = 0; + /* make sure all previous memory operations have passed */ + i915_gem_object_set_domain (obj, + obj->pending_read_domains, + obj->pending_write_domain); + obj->pending_read_domains = 0; + obj->pending_write_domain = 0; } - exec_offset = validate_list[args->buffer_count - 1].buffer_offset; + /* Flush/invalidate caches and chipset buffer */ + i915_gem_dev_set_domain (dev); - /* make sure all previous memory operations have passed */ + exec_offset = validate_list[args->buffer_count - 1].buffer_offset; #if WATCH_EXEC i915_gem_dump_object (object_list[args->buffer_count - 1], @@ -773,6 +853,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_gem_object *obj = object_list[i]; struct drm_i915_gem_object *obj_priv = obj->driver_private; + /* + * Have the cookie hold a reference to this object + * which is freed when the object is waited for + */ + if (obj_priv->last_rendering_cookie == 0) + drm_gem_object_reference (obj); obj_priv->last_rendering_cookie = cookie; } @@ -789,6 +875,13 @@ err: for (i = 0; i < args->buffer_count; i++) drm_gem_object_unreference(object_list[i]); } + + /* XXX kludge for now as we don't clean the exec ring yet */ + if (object_list != NULL) { + for (i = 0; i < args->buffer_count; i++) + i915_gem_object_wait_rendering (object_list[i]); + } + drm_free(object_list, sizeof(*object_list) * args->buffer_count, DRM_MEM_DRIVER); drm_free(validate_list, sizeof(*validate_list) * args->buffer_count, @@ -873,3 +966,13 @@ void i915_gem_free_object(struct drm_gem_object *obj) drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); } + +int +i915_gem_set_domain_ioctl (struct drm_gem_object *obj, + uint32_t read_domains, + uint32_t write_domain) +{ + i915_gem_object_set_domain (obj, read_domains, write_domain); + i915_gem_dev_set_domain (obj->dev); + return 0; +} diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h index 8c3cd64..d71447e 100644 --- a/shared-core/i915_drm.h +++ b/shared-core/i915_drm.h @@ -443,6 +443,20 @@ struct drm_i915_gem_relocation_entry { * the execbuffer ioctl when the relocation is written. */ uint64_t presumed_offset; + + /** + * Target memory domains read by this operation. + */ + uint32_t read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + uint32_t write_domain; }; /** @@ -451,13 +465,6 @@ struct drm_i915_gem_relocation_entry { * Most of these just align with the various caches in * the system and are used to flush and invalidate as * objects end up cached in different domains. - * - * STOLEN is a domain for the stolen memory portion of the - * address space; those pages are accessible only through the - * GTT and, hence, look a lot like VRAM on a discrete card. - * We'll allow programs to move objects into stolen memory - * mostly as a way to demonstrate the VRAM capabilities of this - * API */ /* 0x00000001 is DRM_GEM_DOMAIN_CPU */ @@ -465,8 +472,7 @@ struct drm_i915_gem_relocation_entry { #define DRM_GEM_DOMAIN_I915_SAMPLER 0x00000004 /* Sampler cache, used by texture engine */ #define DRM_GEM_DOMAIN_I915_COMMAND 0x00000008 /* Command queue, used to load batch buffers */ #define DRM_GEM_DOMAIN_I915_INSTRUCTION 0x00000010 /* Instruction cache, used by shader programs */ -#define DRM_GEM_DOMAIN_I915_STOLEN 0x00000020 /* Stolen memory, needed by some objects */ -#define DRM_GEM_DOMAIN_I915_VERTEX 0x00000040 /* Vertex address cache */ +#define DRM_GEM_DOMAIN_I915_VERTEX 0x00000020 /* Vertex address cache */ struct drm_i915_gem_validate_entry { /** @@ -482,10 +488,6 @@ struct drm_i915_gem_validate_entry { /** Required alignment in graphics aperture */ uint64_t alignment; - /** Memory domains used in this execbuffer run */ - uint32_t read_domains; - uint32_t write_domain; - /** * Returned value of the updated offset of the buffer, for future * presumed_offset writes. diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h index daa77f7..96257ab 100644 --- a/shared-core/i915_drv.h +++ b/shared-core/i915_drv.h @@ -383,6 +383,9 @@ int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_init_object(struct drm_gem_object *obj); void i915_gem_free_object(struct drm_gem_object *obj); +int i915_gem_set_domain_ioctl (struct drm_gem_object *obj, + uint32_t read_domains, + uint32_t write_domain); #endif -- 2.7.4