drm/i915: Replace the pending_gpu_write flag with an explicit seqno
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 20 Jul 2012 11:41:01 +0000 (12:41 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Wed, 25 Jul 2012 16:23:52 +0000 (18:23 +0200)
As we always flush the GPU cache prior to emitting the breadcrumb, we no
longer have to worry about the deferred flush causing the
pending_gpu_write to be delayed. So we can instead utilize the known
last_write_seqno to hopefully minimise the wait times.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_irq.c

index 359f6e8..a8b7db6 100644 (file)
@@ -121,14 +121,15 @@ static const char *cache_level_str(int type)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
-       seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s",
+       seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s",
                   &obj->base,
                   get_pin_flag(obj),
                   get_tiling_flag(obj),
                   obj->base.size / 1024,
                   obj->base.read_domains,
                   obj->base.write_domain,
-                  obj->last_rendering_seqno,
+                  obj->last_read_seqno,
+                  obj->last_write_seqno,
                   obj->last_fenced_seqno,
                   cache_level_str(obj->cache_level),
                   obj->dirty ? " dirty" : "",
@@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m,
        seq_printf(m, "%s [%d]:\n", name, count);
 
        while (count--) {
-               seq_printf(m, "  %08x %8u %04x %04x %08x%s%s%s%s%s%s%s",
+               seq_printf(m, "  %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s",
                           err->gtt_offset,
                           err->size,
                           err->read_domains,
                           err->write_domain,
-                          err->seqno,
+                          err->rseqno, err->wseqno,
                           pin_flag(err->pinned),
                           tiling_flag(err->tiling),
                           dirty_flag(err->dirty),
index 1f5f5ff..49a532e 100644 (file)
@@ -221,7 +221,7 @@ struct drm_i915_error_state {
        struct drm_i915_error_buffer {
                u32 size;
                u32 name;
-               u32 seqno;
+               u32 rseqno, wseqno;
                u32 gtt_offset;
                u32 read_domains;
                u32 write_domain;
@@ -895,12 +895,6 @@ struct drm_i915_gem_object {
        unsigned int dirty:1;
 
        /**
-        * This is set if the object has been written to since the last
-        * GPU flush.
-        */
-       unsigned int pending_gpu_write:1;
-
-       /**
         * Fence register bits (if any) for this object.  Will be set
         * as needed when mapped into the GTT.
         * Protected by dev->struct_mutex.
@@ -992,7 +986,8 @@ struct drm_i915_gem_object {
        struct intel_ring_buffer *ring;
 
        /** Breadcrumb of last rendering to the buffer. */
-       uint32_t last_rendering_seqno;
+       uint32_t last_read_seqno;
+       uint32_t last_write_seqno;
        /** Breadcrumb of last fenced GPU access to the buffer. */
        uint32_t last_fenced_seqno;
 
@@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev);
 int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
                                  gfp_t gfpmask);
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
                         struct intel_ring_buffer *to);
 void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
index 3bef7e6..6a80d65 100644 (file)
@@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
        list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
        list_move_tail(&obj->ring_list, &ring->active_list);
 
-       obj->last_rendering_seqno = seqno;
+       obj->last_read_seqno = seqno;
 
        if (obj->fenced_gpu_access) {
                obj->last_fenced_seqno = seqno;
@@ -1461,7 +1461,8 @@ static void
 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
 {
        list_del_init(&obj->ring_list);
-       obj->last_rendering_seqno = 0;
+       obj->last_read_seqno = 0;
+       obj->last_write_seqno = 0;
        obj->last_fenced_seqno = 0;
 }
 
@@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
        obj->fenced_gpu_access = false;
 
        obj->active = 0;
-       obj->pending_gpu_write = false;
        drm_gem_object_unreference(&obj->base);
 
        WARN_ON(i915_verify_lists(dev));
@@ -1812,7 +1812,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
                                      struct drm_i915_gem_object,
                                      ring_list);
 
-               if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
+               if (!i915_seqno_passed(seqno, obj->last_read_seqno))
                        break;
 
                if (obj->base.write_domain != 0)
@@ -2036,9 +2036,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
  */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
+static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+                              bool readonly)
 {
+       u32 seqno;
        int ret;
 
        /* This function only exists to support waiting for existing rendering,
@@ -2049,13 +2051,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
        /* If there is rendering queued on the buffer being evicted, wait for
         * it.
         */
-       if (obj->active) {
-               ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno);
-               if (ret)
-                       return ret;
-               i915_gem_retire_requests_ring(obj->ring);
+       if (readonly)
+               seqno = obj->last_write_seqno;
+       else
+               seqno = obj->last_read_seqno;
+       if (seqno == 0)
+               return 0;
+
+       ret = i915_wait_seqno(obj->ring, seqno);
+       if (ret)
+               return ret;
+
+       /* Manually manage the write flush as we may have not yet retired
+        * the buffer.
+        */
+       if (obj->last_write_seqno &&
+           i915_seqno_passed(seqno, obj->last_write_seqno)) {
+               obj->last_write_seqno = 0;
+               obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
        }
 
+       i915_gem_retire_requests_ring(obj->ring);
        return 0;
 }
 
@@ -2074,10 +2090,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
                if (ret)
                        return ret;
 
-               ret = i915_gem_check_olr(obj->ring,
-                                        obj->last_rendering_seqno);
+               ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
                if (ret)
                        return ret;
+
                i915_gem_retire_requests_ring(obj->ring);
        }
 
@@ -2137,7 +2153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                goto out;
 
        if (obj->active) {
-               seqno = obj->last_rendering_seqno;
+               seqno = obj->last_read_seqno;
                ring = obj->ring;
        }
 
@@ -2192,11 +2208,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
                return 0;
 
        if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-               return i915_gem_object_wait_rendering(obj);
+               return i915_gem_object_wait_rendering(obj, false);
 
        idx = intel_ring_sync_index(from, to);
 
-       seqno = obj->last_rendering_seqno;
+       seqno = obj->last_read_seqno;
        if (seqno <= from->sync_seqno[idx])
                return 0;
 
@@ -2940,11 +2956,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       if (obj->pending_gpu_write || write) {
-               ret = i915_gem_object_wait_rendering(obj);
-               if (ret)
-                       return ret;
-       }
+       ret = i915_gem_object_wait_rendering(obj, !write);
+       if (ret)
+               return ret;
 
        i915_gem_object_flush_cpu_write_domain(obj);
 
@@ -3115,7 +3129,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
                        return ret;
        }
 
-       ret = i915_gem_object_wait_rendering(obj);
+       ret = i915_gem_object_wait_rendering(obj, false);
        if (ret)
                return ret;
 
@@ -3143,11 +3157,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       if (write || obj->pending_gpu_write) {
-               ret = i915_gem_object_wait_rendering(obj);
-               if (ret)
-                       return ret;
-       }
+       ret = i915_gem_object_wait_rendering(obj, !write);
+       if (ret)
+               return ret;
 
        i915_gem_object_flush_gtt_write_domain(obj);
 
index f94ec57..2353e6e 100644 (file)
@@ -954,7 +954,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
                i915_gem_object_move_to_active(obj, ring, seqno);
                if (obj->base.write_domain) {
                        obj->dirty = 1;
-                       obj->pending_gpu_write = true;
+                       obj->last_write_seqno = seqno;
                        list_move_tail(&obj->gpu_write_list,
                                       &ring->gpu_write_list);
                        if (obj->pin_count) /* check for potential scanout */
index 566f61b..41ed41d 100644 (file)
@@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 {
        err->size = obj->base.size;
        err->name = obj->base.name;
-       err->seqno = obj->last_rendering_seqno;
+       err->rseqno = obj->last_read_seqno;
+       err->wseqno = obj->last_write_seqno;
        err->gtt_offset = obj->gtt_offset;
        err->read_domains = obj->base.read_domains;
        err->write_domain = obj->base.write_domain;
@@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
                if (obj->ring != ring)
                        continue;
 
-               if (i915_seqno_passed(seqno, obj->last_rendering_seqno))
+               if (i915_seqno_passed(seqno, obj->last_read_seqno))
                        continue;
 
                if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)