From 61caf797aeb88af42ea0d333ad3f6ba88468d37f Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 23 Jun 2008 00:53:53 -0700 Subject: [PATCH] [intel-gem] pwrite through GTT Pin/copy_from_user/unpin through the GTT to eliminate clflush costs. Benchmarks say this helps quite a bit. --- linux-core/i915_gem.c | 158 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 125 insertions(+), 33 deletions(-) diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index 4361b06..4dfbd2a 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -36,7 +36,14 @@ #define WATCH_LRU 0 #define WATCH_RELOC 0 #define WATCH_INACTIVE 0 +#define WATCH_PWRITE 0 +#if WATCH_BUF | WATCH_EXEC | WATCH_PWRITE +static void +i915_gem_dump_object(struct drm_gem_object *obj, int len, + const char *where, uint32_t mark); +#endif + static int i915_gem_object_set_domain(struct drm_gem_object *obj, uint32_t read_domains, @@ -154,6 +161,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return 0; } +#include "drm_compat.h" + /** * Writes data to the object referenced by handle. * @@ -165,41 +174,121 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_pwrite *args = data; struct drm_gem_object *obj; - ssize_t written; + struct drm_i915_gem_object *obj_priv; + ssize_t remain; loff_t offset; - int ret; + char __user *user_data; + char *vaddr; + int i, o, l; + int ret = 0; + unsigned long pfn; + unsigned long unwritten; obj = drm_gem_object_lookup(dev, file_priv, args->handle); if (obj == NULL) return -EINVAL; + /** Bounds check destination. + * + * XXX: This could use review for overflow issues... + */ + if (args->offset > obj->size || args->size > obj->size || + args->offset + args->size > obj->size) + return -EFAULT; + + user_data = (char __user *) (uintptr_t) args->data_ptr; + remain = args->size; + if (!access_ok(VERIFY_READ, user_data, remain)) + return -EFAULT; + + mutex_lock(&dev->struct_mutex); - ret = i915_gem_set_domain(obj, file_priv, - I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + ret = i915_gem_object_pin(obj, 0); if (ret) { drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return ret; } - offset = args->offset; + ret = i915_gem_set_domain(obj, file_priv, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + if (ret) + goto fail; + + obj_priv = obj->driver_private; + offset = obj_priv->gtt_offset + args->offset; + obj_priv->dirty = 1; + + while (remain > 0) { + + /** Operation in this page + * + * i = page number + * o = offset within page + * l = bytes to copy + */ + i = offset >> PAGE_SHIFT; + o = offset & (PAGE_SIZE-1); + l = remain; + if ((o + l) > PAGE_SIZE) + l = PAGE_SIZE - o; + + pfn = (dev->agp->base >> PAGE_SHIFT) + i; + +#ifdef DRM_KMAP_ATOMIC_PROT_PFN + /* kmap_atomic can't map IO pages on non-HIGHMEM kernels + */ + vaddr = kmap_atomic_prot_pfn(pfn, KM_USER0, + __pgprot(__PAGE_KERNEL)); +#if WATCH_PWRITE + DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", + i, o, l, pfn, vaddr); +#endif + unwritten = __copy_from_user_inatomic_nocache(vaddr + o, user_data, l); + kunmap_atomic(vaddr, KM_USER0); - written = vfs_write(obj->filp, - (char __user *)(uintptr_t) args->data_ptr, - args->size, &offset); + if (unwritten) +#endif + { + vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); +#if WATCH_PWRITE + DRM_INFO("pwrite slow i %d o %d l %d pfn %ld vaddr %p\n", + i, o, l, pfn, vaddr); +#endif + if (vaddr == NULL) { + ret = -EFAULT; + goto fail; + } + unwritten = __copy_from_user(vaddr + o, user_data, l); +#if WATCH_PWRITE + DRM_INFO("unwritten %ld\n", unwritten); +#endif + iounmap(vaddr); + if (unwritten) { + ret = -EFAULT; + goto fail; + } + } - if (written != args->size) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - if (written < 0) - return written; - else - return -EINVAL; + remain -= l; + user_data += l; + offset += l; } +#if WATCH_PWRITE && 1 + i915_gem_clflush_object(obj); + i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); + i915_gem_clflush_object(obj); +#endif +fail: + i915_gem_object_unpin (obj); drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); - return 0; +#if WATCH_PWRITE + if (ret) + DRM_INFO("pwrite failed %d\n", ret); +#endif + return ret; } /** @@ -361,7 +450,7 @@ i915_verify_inactive(struct drm_device *dev, char *file, int line) list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { obj = obj_priv->obj; - if (obj_priv->pin_count || obj_priv->active || (obj->write_domain & ~I915_GEM_DOMAIN_CPU)) + if (obj_priv->pin_count || obj_priv->active || (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) DRM_ERROR("inactive %p (p %d a %d w %x) %s:%d\n", obj, obj_priv->pin_count, obj_priv->active, obj->write_domain, file, line); @@ -644,7 +733,7 @@ i915_gem_flush(struct drm_device *dev, if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); - if ((invalidate_domains|flush_domains) & ~I915_GEM_DOMAIN_CPU) { + if ((invalidate_domains|flush_domains) & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { /* * read/write caches: * @@ -712,7 +801,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) /* If there are writes queued to the buffer, flush and * create a new seqno to wait for. */ - if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU)) { + if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { uint32_t write_domain = obj->write_domain; #if WATCH_BUF DRM_INFO("%s: flushing object %p from write domain %08x\n", @@ -798,11 +887,13 @@ i915_gem_object_unbind(struct drm_gem_object *obj) i915_gem_object_free_page_list(obj); - atomic_dec(&dev->gtt_count); - atomic_sub(obj->size, &dev->gtt_memory); - - drm_memrange_put_block(obj_priv->gtt_space); - obj_priv->gtt_space = NULL; + if (obj_priv->gtt_space) { + atomic_dec(&dev->gtt_count); + atomic_sub(obj->size, &dev->gtt_memory); + + drm_memrange_put_block(obj_priv->gtt_space); + obj_priv->gtt_space = NULL; + } /* Remove ourselves from the LRU list if present. */ if (!list_empty(&obj_priv->list)) @@ -811,7 +902,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return 0; } -#if WATCH_BUF | WATCH_EXEC +#if WATCH_BUF | WATCH_EXEC | WATCH_PWRITE static void i915_gem_dump_page(struct page *page, uint32_t start, uint32_t end, uint32_t bias, uint32_t mark) @@ -1105,8 +1196,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) * wasn't in the GTT, there shouldn't be any way it could have been in * a GPU cache */ - BUG_ON(obj->read_domains & ~I915_GEM_DOMAIN_CPU); - BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); + BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); + BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); return 0; } @@ -1289,7 +1380,7 @@ i915_gem_object_set_domain(struct drm_gem_object *obj, * flushed before the cpu cache is invalidated */ if ((invalidate_domains & I915_GEM_DOMAIN_CPU) && - (flush_domains & ~I915_GEM_DOMAIN_CPU)) { + (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))) { ret = i915_gem_object_wait_rendering(obj); if (ret) return ret; @@ -1911,7 +2002,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) if (obj_priv->pin_count == 1) { atomic_inc(&dev->pin_count); atomic_add(obj->size, &dev->pin_memory); - if (!obj_priv->active && (obj->write_domain & ~I915_GEM_DOMAIN_CPU) == 0 && + if (!obj_priv->active && (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) == 0 && !list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); } @@ -1937,7 +2028,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj) * the inactive list */ if (obj_priv->pin_count == 0) { - if (!obj_priv->active && (obj->write_domain & ~I915_GEM_DOMAIN_CPU) == 0) + if (!obj_priv->active && (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) == 0) list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); atomic_dec(&dev->pin_count); @@ -2096,7 +2187,7 @@ i915_gem_set_domain(struct drm_gem_object *obj, return ret; flush_domains = i915_gem_dev_set_domain(obj->dev); - if (flush_domains & ~I915_GEM_DOMAIN_CPU) + if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) (void) i915_add_request(dev, flush_domains); return 0; @@ -2154,8 +2245,9 @@ i915_gem_idle(struct drm_device *dev) /* Flush the GPU along with all non-CPU write domains */ - i915_gem_flush(dev, ~I915_GEM_DOMAIN_CPU, ~I915_GEM_DOMAIN_CPU); - seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); + i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), + ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); + seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); if (seqno == 0) { mutex_unlock(&dev->struct_mutex); -- 2.7.4