intel-gem: Speed up tiled readpixels by tracking which pages have been flushed.
authorEric Anholt <eric@anholt.net>
Sun, 27 Jul 2008 02:28:14 +0000 (19:28 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 28 Jul 2008 18:25:19 +0000 (11:25 -0700)
This is around 3x or so speedup, since we would read wide rows at a time, and
clflush each tile 8 times as a result.  We'll want code related to this anyway
when we do fault-based per-page clflushing for sw fallbacks.

linux-core/i915_gem.c
shared-core/i915_drv.h

index 4087854..eea2d48 100644 (file)
@@ -36,6 +36,12 @@ static int
 i915_gem_object_set_domain(struct drm_gem_object *obj,
                            uint32_t read_domains,
                            uint32_t write_domain);
+static int
+i915_gem_object_set_domain_range(struct drm_gem_object *obj,
+                                uint64_t offset,
+                                uint64_t size,
+                                uint32_t read_domains,
+                                uint32_t write_domain);
 int
 i915_gem_set_domain(struct drm_gem_object *obj,
                    struct drm_file *file_priv,
@@ -136,32 +142,11 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
        mutex_lock(&dev->struct_mutex);
 
-       /* Do a partial equivalent of i915_gem_set_domain(CPU, 0), as
-        * we don't want to clflush whole objects to read a portion of them.
-        *
-        * The side effect of doing this is that repeated preads of the same
-        * contents would take extra clflush overhead, since we don't track
-        * flushedness on a page basis.
-        */
-       if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
-               ret = i915_gem_object_wait_rendering(obj);
-               if (ret) {
-                       drm_gem_object_unreference(obj);
-                       mutex_unlock(&dev->struct_mutex);
-                       return ret;
-               }
-       }
-       if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               int first_page = args->offset / PAGE_SIZE;
-               int last_page = (args->offset + args->size - 1) / PAGE_SIZE;
-
-               /* If we don't have the page list, the pages are unpinned
-                * and swappable, and thus should already be in the CPU domain.
-                */
-               BUG_ON(obj_priv->page_list == NULL);
-
-               drm_ttm_cache_flush(&obj_priv->page_list[first_page],
-                                   last_page - first_page + 1);
+       ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
+                                              I915_GEM_DOMAIN_CPU, 0);
+       if (ret != 0) {
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
        }
 
        offset = args->offset;
@@ -1383,7 +1368,17 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
 
        if ((write_domain | flush_domains) != 0)
                obj->write_domain = write_domain;
+
+       /* If we're invalidating the CPU domain, clear the per-page CPU
+        * domain list as well.
+        */
+       if (obj_priv->page_cpu_valid != NULL &&
+           (obj->read_domains & I915_GEM_DOMAIN_CPU) &&
+           ((read_domains & I915_GEM_DOMAIN_CPU) == 0)) {
+               memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
+       }
        obj->read_domains = read_domains;
+
        dev->invalidate_domains |= invalidate_domains;
        dev->flush_domains |= flush_domains;
 #if WATCH_BUF
@@ -1396,6 +1391,57 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
 }
 
 /**
+ * Set the read/write domain on a range of the object.
+ *
+ * Currently only implemented for CPU reads, otherwise drops to normal
+ * i915_gem_object_set_domain().
+ */
+static int
+i915_gem_object_set_domain_range(struct drm_gem_object *obj,
+                                uint64_t offset,
+                                uint64_t size,
+                                uint32_t read_domains,
+                                uint32_t write_domain)
+{
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int ret, i;
+
+       if (obj->read_domains & I915_GEM_DOMAIN_CPU)
+               return 0;
+
+       if (read_domains != I915_GEM_DOMAIN_CPU ||
+           write_domain != 0)
+               return i915_gem_object_set_domain(obj,
+                                                 read_domains, write_domain);
+
+       /* Wait on any GPU rendering to the object to be flushed. */
+       if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
+               ret = i915_gem_object_wait_rendering(obj);
+               if (ret)
+                       return ret;
+       }
+
+       if (obj_priv->page_cpu_valid == NULL) {
+               obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
+                                                     DRM_MEM_DRIVER);
+       }
+
+       /* Flush the cache on any pages that are still invalid from the CPU's
+        * perspective.
+        */
+       for (i = offset / PAGE_SIZE; i < (offset + size - 1) / PAGE_SIZE; i++) {
+               if (obj_priv->page_cpu_valid[i])
+                       continue;
+
+               drm_ttm_cache_flush(obj_priv->page_list + i, 1);
+
+               obj_priv->page_cpu_valid[i] = 1;
+       }
+
+       return 0;
+}
+
+/**
  * Once all of the objects have been set in the proper domain,
  * perform the necessary flush and invalidate operations.
  *
@@ -2097,6 +2143,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
        i915_gem_object_unbind(obj);
 
+       drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
        drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
 
index 99416e8..7bb9e5b 100644 (file)
@@ -393,6 +393,12 @@ struct drm_i915_gem_object {
 
        /** Current tiling mode for the object. */
        uint32_t tiling_mode;
+
+       /**
+        * Flagging of which individual pages are valid in GEM_DOMAIN_CPU when
+        * GEM_DOMAIN_CPU is not in the object's read domain.
+        */
+       uint8_t *page_cpu_valid;
 };
 
 /**