[intel-gem] Move domains to relocation records. add set_domain ioctl.
authorKeith Packard <keithp@keithp.com>
Thu, 8 May 2008 17:44:02 +0000 (10:44 -0700)
committerKeith Packard <keithp@keithp.com>
Thu, 8 May 2008 17:44:02 +0000 (10:44 -0700)
Domain information is about buffer relationships, not buffer contents. That
means a relocation contains the domain information as it knows how the
source buffer references the target buffer.

This also adds the set_domain ioctl so that user space can move buffers to
the cpu domain.

linux-core/drmP.h
linux-core/drm_drv.c
linux-core/drm_gem.c
linux-core/i915_drv.c
linux-core/i915_gem.c
shared-core/i915_drm.h
shared-core/i915_drv.h

index cdeecc3..11688cd 100644 (file)
@@ -652,6 +652,15 @@ struct drm_gem_object {
        uint32_t        read_domains;
        uint32_t        write_domain;
 
+       /**
+        * While validating an exec operation, the
+        * new read/write domain values are computed here.
+        * They will be transferred to the above values
+        * at the point that any cache flushing occurs
+        */
+       uint32_t        pending_read_domains;
+       uint32_t        pending_write_domain;
+
        void *driver_private;
 };
 
@@ -765,6 +774,13 @@ struct drm_driver {
        int (*gem_init_object) (struct drm_gem_object *obj);
        void (*gem_free_object) (struct drm_gem_object *obj);
 
+       /**
+        * Driver-specific callback to set memory domains from userspace
+        */
+       int (*gem_set_domain) (struct drm_gem_object *obj,
+                              uint32_t read_domains,
+                              uint32_t write_domain);
+
        struct drm_fence_driver *fence_driver;
        struct drm_bo_driver *bo_driver;
 
@@ -1392,6 +1408,8 @@ int drm_gem_name_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *file_priv);
 int drm_gem_open_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *file_priv);
+int drm_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+                            struct drm_file *file_priv);
 
 void drm_gem_open(struct drm_device *dev, struct drm_file *file_private);
 void drm_gem_release(struct drm_device *dev, struct drm_file *file_private);
index 4c85cdf..16c38db 100644 (file)
@@ -158,6 +158,7 @@ static struct drm_ioctl_desc drm_ioctls[] = {
        DRM_IOCTL_DEF(DRM_IOCTL_GEM_MMAP, drm_gem_mmap_ioctl, 0),
        DRM_IOCTL_DEF(DRM_IOCTL_GEM_NAME, drm_gem_name_ioctl, DRM_AUTH),
        DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+       DRM_IOCTL_DEF(DRM_IOCTL_GEM_SET_DOMAIN, drm_gem_set_domain_ioctl, DRM_AUTH),
 };
 
 #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
index 4eaeffc..3673c93 100644 (file)
@@ -325,10 +325,6 @@ drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
        if (IS_ERR((void *)addr))
                return addr;
        
-       /* XXX hack until we have a driver callback to make this work */
-       obj->read_domains = DRM_GEM_DOMAIN_CPU;
-       obj->write_domain = DRM_GEM_DOMAIN_CPU;
-
        args->addr_ptr = (uint64_t) addr;
 
        return 0;
@@ -461,6 +457,37 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 }
 
 /**
+ * Called when user space prepares to use an object
+ */
+int
+drm_gem_set_domain_ioctl (struct drm_device *dev, void *data,
+                         struct drm_file *file_priv)
+{
+       struct drm_gem_set_domain *args = data;
+       struct drm_gem_object *obj;
+       int ret;
+
+       if (!(dev->driver->driver_features & DRIVER_GEM))
+               return -ENODEV;
+
+       obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+       if (obj == NULL)
+               return -EINVAL;
+
+       if (dev->driver->gem_set_domain) {
+               ret = dev->driver->gem_set_domain (obj,
+                                                  args->read_domains,
+                                                  args->write_domain);
+       } else {
+               obj->read_domains = args->read_domains;
+               obj->write_domain = args->write_domain;
+               ret = 0;
+       }
+       drm_gem_object_unreference (obj);
+       return ret;
+}
+
+/**
  * Called at device open time, sets up the structure for handling refcounting
  * of mm objects.
  */
index 3e788d2..ae8cf3e 100644 (file)
@@ -588,6 +588,7 @@ static struct drm_driver driver = {
        .ioctls = i915_ioctls,
        .gem_init_object = i915_gem_init_object,
        .gem_free_object = i915_gem_free_object,
+       .gem_set_domain = i915_gem_set_domain_ioctl,
        .fops = {
                .owner = THIS_MODULE,
                .open = drm_open,
index 1d55eaa..861e7bb 100644 (file)
@@ -33,6 +33,7 @@
 #define WATCH_BUF      0
 #define WATCH_EXEC     0
 #define WATCH_LRU      0
+#define WATCH_RELOC    0
 
 int
 i915_gem_init_ioctl(struct drm_device *dev, void *data,
@@ -75,54 +76,61 @@ i915_gem_object_free_page_list(struct drm_gem_object *obj)
 }
 
 static void
-i915_gem_flush(struct drm_device *dev, uint32_t domains)
+i915_gem_flush(struct drm_device *dev, uint32_t invalidate_domains, uint32_t flush_domains)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
        uint32_t        cmd;
        RING_LOCALS;
 
 #if WATCH_EXEC
-       DRM_INFO ("%s: flush %08x\n", __FUNCTION__, domains);
+       DRM_INFO ("%s: invalidate %08x flush %08x\n", __FUNCTION__,
+                 invalidate_domains, flush_domains);
 #endif
 
-       /* read/write caches:
-        * DRM_GEM_DOMAIN_I915_RENDER is always invalidated, but is
-        * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is also
-        * flushed at 2d versus 3d pipeline switches.
-        *
-        * read-only caches:
-        * DRM_GEM_DOMAIN_I915_SAMPLER is flushed on pre-965 if MI_READ_FLUSH
-        * is set, and is always flushed on 965.
-        * DRM_GEM_DOMAIN_I915_COMMAND may not exist?
-        * DRM_GEM_DOMAIN_I915_INSTRUCTION, which exists on 965, is invalidated
-        * when MI_EXE_FLUSH is set.
-        * DRM_GEM_DOMAIN_I915_VERTEX, which exists on 965, is invalidated with
-        * every MI_FLUSH.
-        *
-        * TLBs:
-        * On 965, TLBs associated with DRM_GEM_DOMAIN_I915_COMMAND and
-        * DRM_GEM_DOMAIN_CPU in are invalidated at PTE write and
-        * DRM_GEM_DOMAIN_I915_RENDER and DRM_GEM_DOMAIN_I915_SAMPLER are
-        * flushed at any MI_FLUSH.
-        */
-
-       cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH;
-       if (domains & DRM_GEM_DOMAIN_I915_RENDER)
-               cmd &= ~MI_NO_WRITE_FLUSH;
-       if (!IS_I965G(dev)) {
-               /* On the 965, the sampler cache always gets flushed and this
-                * bit is reserved.
+       if (flush_domains & DRM_GEM_DOMAIN_CPU)
+               drm_agp_chipset_flush(dev);
+               
+       if ((invalidate_domains|flush_domains) & ~DRM_GEM_DOMAIN_CPU)
+       {
+               /* read/write caches:
+                * DRM_GEM_DOMAIN_I915_RENDER is always invalidated, but is
+                * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is also
+                * flushed at 2d versus 3d pipeline switches.
+                *
+                * read-only caches:
+                * DRM_GEM_DOMAIN_I915_SAMPLER is flushed on pre-965 if MI_READ_FLUSH
+                * is set, and is always flushed on 965.
+                * DRM_GEM_DOMAIN_I915_COMMAND may not exist?
+                * DRM_GEM_DOMAIN_I915_INSTRUCTION, which exists on 965, is invalidated
+                * when MI_EXE_FLUSH is set.
+                * DRM_GEM_DOMAIN_I915_VERTEX, which exists on 965, is invalidated with
+                * every MI_FLUSH.
+                *
+                * TLBs:
+                * On 965, TLBs associated with DRM_GEM_DOMAIN_I915_COMMAND and
+                * DRM_GEM_DOMAIN_CPU in are invalidated at PTE write and
+                * DRM_GEM_DOMAIN_I915_RENDER and DRM_GEM_DOMAIN_I915_SAMPLER are
+                * flushed at any MI_FLUSH.
                 */
-               if (domains & DRM_GEM_DOMAIN_I915_SAMPLER)
-                       cmd |= MI_READ_FLUSH;
+       
+               cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH;
+               if ((invalidate_domains|flush_domains) & DRM_GEM_DOMAIN_I915_RENDER)
+                       cmd &= ~MI_NO_WRITE_FLUSH;
+               if (!IS_I965G(dev)) {
+                       /* On the 965, the sampler cache always gets flushed and this
+                        * bit is reserved.
+                        */
+                       if (invalidate_domains & DRM_GEM_DOMAIN_I915_SAMPLER)
+                               cmd |= MI_READ_FLUSH;
+               }
+               if (invalidate_domains & DRM_GEM_DOMAIN_I915_INSTRUCTION)
+                       cmd |= MI_EXE_FLUSH;
+       
+               BEGIN_LP_RING(2);
+               OUT_RING(cmd);
+               OUT_RING(0); /* noop */
+               ADVANCE_LP_RING();
        }
-       if (domains & DRM_GEM_DOMAIN_I915_INSTRUCTION)
-               cmd |= MI_EXE_FLUSH;
-
-       BEGIN_LP_RING(2);
-       OUT_RING(cmd);
-       OUT_RING(0); /* noop */
-       ADVANCE_LP_RING();
 }
 
 /**
@@ -145,8 +153,10 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
                DRM_INFO ("%s: flushing object %p from write domain %08x\n",
                          __FUNCTION__, obj, obj->write_domain);
 #endif
-               i915_gem_flush (dev, obj->write_domain);
+               i915_gem_flush (dev, 0, obj->write_domain);
                obj->write_domain = 0;
+               if (obj_priv->last_rendering_cookie == 0)
+                       drm_gem_object_reference (obj);
                obj_priv->last_rendering_cookie = i915_emit_irq (dev);
        }
        /* If there is rendering queued on the buffer being evicted, wait for
@@ -162,6 +172,9 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
                        return ret;
                /* Clear it now that we know it's passed. */
                obj_priv->last_rendering_cookie = 0;
+               
+               /* The cookie held a reference to the object, release that now */
+               drm_gem_object_unreference (obj);
        }
 
        return 0;
@@ -194,10 +207,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
        drm_memrange_put_block(obj_priv->gtt_space);
        obj_priv->gtt_space = NULL;
        if (!list_empty (&obj_priv->gtt_lru_entry))
-       {
                list_del_init(&obj_priv->gtt_lru_entry);
-               drm_gem_object_unreference (obj);
-       }
 }
 
 #if WATCH_BUF | WATCH_EXEC
@@ -403,11 +413,9 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 static void
 i915_gem_clflush_object (struct drm_gem_object *obj)
 {
-       struct drm_device               *dev = obj->dev;
        struct drm_i915_gem_object      *obj_priv = obj->driver_private;
 
        drm_ttm_cache_flush (obj_priv->page_list, obj->size / PAGE_SIZE);
-       drm_agp_chipset_flush(dev);
 }
        
 /*
@@ -463,8 +471,32 @@ i915_gem_object_set_domain (struct drm_gem_object *obj,
 
        obj->write_domain = write_domain;
        obj->read_domains = read_domains;
-       dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU;
-       dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU;
+       dev->invalidate_domains |= invalidate_domains;
+       dev->flush_domains |= flush_domains;
+}
+
+/**
+ * Once all of the objects have been set in the proper domain,
+ * perform the necessary flush and invalidate operations
+ */
+
+static void
+i915_gem_dev_set_domain (struct drm_device *dev)
+{
+       /*
+        * Now that all the buffers are synced to the proper domains,
+        * flush and invalidate the collected domains
+        */
+       if (dev->invalidate_domains | dev->flush_domains)
+       {
+#if WATCH_EXEC
+               DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n",
+                         __FUNCTION__, dev->invalidate_domains, dev->flush_domains);
+#endif
+               i915_gem_flush (dev, dev->invalidate_domains, dev->flush_domains);
+               dev->invalidate_domains = 0;
+               dev->flush_domains = 0;
+       }
 }
 
 static int
@@ -488,17 +520,13 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
                        return -ENOMEM;
        }
 
-       /* Do domain migration */
-       i915_gem_object_set_domain (obj, entry->read_domains, entry->write_domain);
-
        entry->buffer_offset = obj_priv->gtt_offset;
 
        if (obj_priv->pin_count == 0) {
                /* Move our buffer to the head of the LRU. */
-               if (list_empty (&obj_priv->gtt_lru_entry)) {
-                       drm_gem_object_reference (obj);
+               if (list_empty (&obj_priv->gtt_lru_entry))
                        list_add_tail(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru);
-               else
+               else
                        list_move_tail(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru);
 #if WATCH_LRU && 0
                i915_dump_lru (dev, __FUNCTION__);
@@ -536,16 +564,44 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
                }
 
                if (reloc.offset > obj->size - 4) {
-                       DRM_ERROR("Relocation beyond object bounds.\n");
+                       DRM_ERROR("Relocation beyond object bounds: obj %p target %d offset %d size %d.\n",
+                                 obj, reloc.target_handle, (int) reloc.offset, (int) obj->size);
                        drm_gem_object_unreference (target_obj);
                        return -EINVAL;
                }
                if (reloc.offset & 3) {
-                       DRM_ERROR("Relocation not 4-byte aligned.\n");
+                       DRM_ERROR("Relocation not 4-byte aligned: obj %p target %d offset %d.\n",
+                                 obj, reloc.target_handle, (int) reloc.offset);
                        drm_gem_object_unreference (target_obj);
                        return -EINVAL;
                }
 
+               if (reloc.write_domain && target_obj->pending_write_domain &&
+                   reloc.write_domain != target_obj->pending_write_domain)
+               {
+                       DRM_ERROR("Write domain conflict: obj %p target %d offset %d new %08x old %08x\n",
+                                 obj, reloc.target_handle, (int) reloc.offset,
+                                 reloc.write_domain, target_obj->pending_write_domain);
+                       drm_gem_object_unreference (target_obj);
+                       return -EINVAL;
+               }
+               
+#if WATCH_RELOC
+               DRM_INFO ("%s: obj %p offset %08x target %d read %08x write %08x gtt %08x presumed %08x delta %08x\n",
+                         __FUNCTION__,
+                         obj,
+                         (int) reloc.offset,
+                         (int) reloc.target_handle,
+                         (int) reloc.read_domains,
+                         (int) reloc.write_domain,
+                         (int) target_obj_priv->gtt_offset,
+                         (int) reloc.presumed_offset,
+                         reloc.delta);
+#endif
+                         
+               target_obj->pending_read_domains |= reloc.read_domains;
+               target_obj->pending_write_domain |= reloc.write_domain;
+
                /* If the relocation already has the right value in it, no
                 * more work needs to be done.
                 */
@@ -558,6 +614,16 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
                 */
                i915_gem_object_wait_rendering(obj);
 
+               /* As we're writing through the gtt, flush
+                * any CPU writes before we write the relocations
+                */
+               if (obj->write_domain & DRM_GEM_DOMAIN_CPU)
+               {
+                       i915_gem_clflush_object (obj);
+                       drm_agp_chipset_flush(dev);
+                       obj->write_domain = 0;
+               }
+
                /* Map the page containing the relocation we're going to
                 * perform.
                 */
@@ -672,6 +738,19 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
        LOCK_TEST_WITH_RETURN(dev, file_priv);
 
+#if 0
+       /*
+        * XXX wait for previous rendering to complete as we otherwise never
+        * flush the LRU list
+        */
+       {
+               drm_i915_private_t *dev_priv = dev->dev_private;
+               
+               while (!list_empty (&dev_priv->mm.gtt_lru))
+                       i915_gem_evict_something (dev);
+       }
+#endif
+
 #if WATCH_EXEC
        DRM_INFO ("buffers_ptr %d buffer_count %d len %08x\n",
                  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
@@ -717,6 +796,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                }
        }
 
+       /* Set the pending read domains for the batch buffer to COMMAND */
+       object_list[args->buffer_count-1]->pending_read_domains = DRM_GEM_DOMAIN_I915_COMMAND;
+       object_list[args->buffer_count-1]->pending_write_domain = 0;
+
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
                struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -730,22 +813,19 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                        ret = -ENOMEM;
                        goto err;
                }
-       }
 
-       if (dev->invalidate_domains | dev->flush_domains)
-       {
-#if WATCH_EXEC
-               DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n",
-                         __FUNCTION__, dev->invalidate_domains, dev->flush_domains);
-#endif
-               i915_gem_flush (dev, dev->invalidate_domains | dev->flush_domains);
-               dev->invalidate_domains = 0;
-               dev->flush_domains = 0;
+               /* make sure all previous memory operations have passed */
+               i915_gem_object_set_domain (obj,
+                                           obj->pending_read_domains,
+                                           obj->pending_write_domain);
+               obj->pending_read_domains = 0;
+               obj->pending_write_domain = 0;
        }
 
-       exec_offset = validate_list[args->buffer_count - 1].buffer_offset;
+       /* Flush/invalidate caches and chipset buffer */
+       i915_gem_dev_set_domain (dev);
 
-       /* make sure all previous memory operations have passed */
+       exec_offset = validate_list[args->buffer_count - 1].buffer_offset;
 
 #if WATCH_EXEC
        i915_gem_dump_object (object_list[args->buffer_count - 1],
@@ -773,6 +853,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                struct drm_gem_object *obj = object_list[i];
                struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
+               /*
+                * Have the cookie hold a reference to this object
+                * which is freed when the object is waited for
+                */
+               if (obj_priv->last_rendering_cookie == 0)
+                       drm_gem_object_reference (obj);
                obj_priv->last_rendering_cookie = cookie;
        }
 
@@ -789,6 +875,13 @@ err:
                for (i = 0; i < args->buffer_count; i++)
                        drm_gem_object_unreference(object_list[i]);
        }
+
+       /* XXX kludge for now as we don't clean the exec ring yet */
+       if (object_list != NULL) {
+               for (i = 0; i < args->buffer_count; i++)
+                       i915_gem_object_wait_rendering (object_list[i]);
+       }
+
        drm_free(object_list, sizeof(*object_list) * args->buffer_count,
                 DRM_MEM_DRIVER);
        drm_free(validate_list, sizeof(*validate_list) * args->buffer_count,
@@ -873,3 +966,13 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
        drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
+
+int
+i915_gem_set_domain_ioctl (struct drm_gem_object *obj,
+                          uint32_t read_domains,
+                          uint32_t write_domain)
+{
+       i915_gem_object_set_domain (obj, read_domains, write_domain);
+       i915_gem_dev_set_domain (obj->dev);
+       return 0;
+}
index 8c3cd64..d71447e 100644 (file)
@@ -443,6 +443,20 @@ struct drm_i915_gem_relocation_entry {
         * the execbuffer ioctl when the relocation is written.
         */
        uint64_t presumed_offset;
+
+       /**
+        * Target memory domains read by this operation.
+        */
+       uint32_t read_domains;
+
+       /**
+        * Target memory domains written by this operation.
+        *
+        * Note that only one domain may be written by the whole
+        * execbuffer operation, so that where there are conflicts,
+        * the application will get -EINVAL back.
+        */
+       uint32_t write_domain;
 };
 
 /**
@@ -451,13 +465,6 @@ struct drm_i915_gem_relocation_entry {
  * Most of these just align with the various caches in
  * the system and are used to flush and invalidate as
  * objects end up cached in different domains.
- *
- * STOLEN is a domain for the stolen memory portion of the
- * address space; those pages are accessible only through the
- * GTT and, hence, look a lot like VRAM on a discrete card.
- * We'll allow programs to move objects into stolen memory
- * mostly as a way to demonstrate the VRAM capabilities of this
- * API
  */
 
 /* 0x00000001 is DRM_GEM_DOMAIN_CPU */
@@ -465,8 +472,7 @@ struct drm_i915_gem_relocation_entry {
 #define DRM_GEM_DOMAIN_I915_SAMPLER    0x00000004      /* Sampler cache, used by texture engine */
 #define DRM_GEM_DOMAIN_I915_COMMAND    0x00000008      /* Command queue, used to load batch buffers */
 #define DRM_GEM_DOMAIN_I915_INSTRUCTION        0x00000010      /* Instruction cache, used by shader programs */
-#define DRM_GEM_DOMAIN_I915_STOLEN     0x00000020      /* Stolen memory, needed by some objects */
-#define DRM_GEM_DOMAIN_I915_VERTEX     0x00000040      /* Vertex address cache */
+#define DRM_GEM_DOMAIN_I915_VERTEX     0x00000020      /* Vertex address cache */
 
 struct drm_i915_gem_validate_entry {
        /**
@@ -482,10 +488,6 @@ struct drm_i915_gem_validate_entry {
        /** Required alignment in graphics aperture */
        uint64_t alignment;
 
-       /** Memory domains used in this execbuffer run */
-       uint32_t read_domains;
-       uint32_t write_domain;
-
        /**
         * Returned value of the updated offset of the buffer, for future
         * presumed_offset writes.
index daa77f7..96257ab 100644 (file)
@@ -383,6 +383,9 @@ int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
                         struct drm_file *file_priv);
 int i915_gem_init_object(struct drm_gem_object *obj);
 void i915_gem_free_object(struct drm_gem_object *obj);
+int i915_gem_set_domain_ioctl (struct drm_gem_object *obj,
+                              uint32_t read_domains,
+                              uint32_t write_domain);
 
 #endif