drm/i915: Protect request retirement with timeline->mutex

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 15 Aug 2019 20:57:09 +0000 (21:57 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 15 Aug 2019 22:21:13 +0000 (23:21 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 15 Aug 2019 20:57:09 +0000 (21:57 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 15 Aug 2019 22:21:13 +0000 (23:21 +0100)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 1bd2187..77a201b 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
         return 0;
  }
  
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-       struct i915_request *rq;
-
-       /*
-        * Completely unscientific finger-in-the-air estimates for suitable
-        * maximum user request size (to avoid blocking) and then backoff.
-        */
-       if (intel_ring_update_space(ring) >= PAGE_SIZE)
-               return NULL;
-
-       /*
-        * Find a request that after waiting upon, there will be at least half
-        * the ring available. The hysteresis allows us to compete for the
-        * shared ring and should mean that we sleep less often prior to
-        * claiming our resources, but not so long that the ring completely
-        * drains before we can submit our next request.
-        */
-       list_for_each_entry(rq, &ring->request_list, ring_link) {
-               if (__intel_ring_space(rq->postfix,
-                                      ring->emit, ring->size) > ring->size / 2)
-                       break;
-       }
-       if (&rq->ring_link == &ring->request_list)
-               return NULL; /* weird, we will check again later for real */
-
-       return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-       struct i915_request *rq;
-       int ret = 0;
-
-       /*
-        * Apply a light amount of backpressure to prevent excessive hogs
-        * from blocking waiting for space whilst holding struct_mutex and
-        * keeping all of their resources pinned.
-        */
-
-       rq = __eb_wait_for_ring(eb->context->ring);
-       if (rq) {
-               mutex_unlock(&eb->i915->drm.struct_mutex);
-
-               if (i915_request_wait(rq,
-                                     I915_WAIT_INTERRUPTIBLE,
-                                     MAX_SCHEDULE_TIMEOUT) < 0)
-                       ret = -EINTR;
-
-               i915_request_put(rq);
-
-               mutex_lock(&eb->i915->drm.struct_mutex);
-       }
-
-       return ret;
-}
-
  static int eb_lookup_vmas(struct i915_execbuffer *eb)
  {
         struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2134,10 +2077,75 @@ static const enum intel_engine_id user_ring_map[] = {
         [I915_EXEC_VEBOX]       = VECS0
  };
  
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+       struct intel_ring *ring = ce->ring;
+       struct intel_timeline *tl = ce->timeline;
+       struct i915_request *rq;
+
+       /*
+        * Completely unscientific finger-in-the-air estimates for suitable
+        * maximum user request size (to avoid blocking) and then backoff.
+        */
+       if (intel_ring_update_space(ring) >= PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Find a request that after waiting upon, there will be at least half
+        * the ring available. The hysteresis allows us to compete for the
+        * shared ring and should mean that we sleep less often prior to
+        * claiming our resources, but not so long that the ring completely
+        * drains before we can submit our next request.
+        */
+       list_for_each_entry(rq, &tl->requests, link) {
+               if (rq->ring != ring)
+                       continue;
+
+               if (__intel_ring_space(rq->postfix,
+                                      ring->emit, ring->size) > ring->size / 2)
+                       break;
+       }
+       if (&rq->link == &tl->requests)
+               return NULL; /* weird, we will check again later for real */
+
+       return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
  {
         int err;
  
+       if (likely(atomic_inc_not_zero(&ce->pin_count)))
+               return 0;
+
+       err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+       if (err)
+               return err;
+
+       err = __intel_context_do_pin(ce);
+       mutex_unlock(&eb->i915->drm.struct_mutex);
+
+       return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+       if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+               return;
+
+       mutex_lock(&eb->i915->drm.struct_mutex);
+       intel_context_unpin(ce);
+       mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+       struct intel_timeline *tl;
+       struct i915_request *rq;
+       int err;
+
         /*
          * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
          * EIO if the GPU is already wedged.
@@ -2151,7 +2159,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
          * GGTT space, so do this first before we reserve a seqno for
          * ourselves.
          */
-       err = intel_context_pin(ce);
+       err = __eb_pin_context(eb, ce);
         if (err)
                 return err;
  
@@ -2163,23 +2171,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
          * until the timeline is idle, which in turn releases the wakeref
          * taken on the engine, and the parent device.
          */
-       err = intel_context_timeline_lock(ce);
-       if (err)
+       tl = intel_context_timeline_lock(ce);
+       if (IS_ERR(tl)) {
+               err = PTR_ERR(tl);
                 goto err_unpin;
+       }
  
         intel_context_enter(ce);
-       intel_context_timeline_unlock(ce);
+       rq = eb_throttle(ce);
+
+       intel_context_timeline_unlock(tl);
+
+       if (rq) {
+               if (i915_request_wait(rq,
+                                     I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0) {
+                       i915_request_put(rq);
+                       err = -EINTR;
+                       goto err_exit;
+               }
+
+               i915_request_put(rq);
+       }
  
         eb->engine = ce->engine;
         eb->context = ce;
         return 0;
  
+err_exit:
+       mutex_lock(&tl->mutex);
+       intel_context_exit(ce);
+       intel_context_timeline_unlock(tl);
  err_unpin:
-       intel_context_unpin(ce);
+       __eb_unpin_context(eb, ce);
         return err;
  }
  
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
  {
         struct intel_context *ce = eb->context;
         struct intel_timeline *tl = ce->timeline;
@@ -2188,7 +2216,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
         intel_context_exit(ce);
         mutex_unlock(&tl->mutex);
  
-       intel_context_unpin(ce);
+       __eb_unpin_context(eb, ce);
  }
  
  static unsigned int
@@ -2233,9 +2261,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
  }
  
  static int
-eb_select_engine(struct i915_execbuffer *eb,
-                struct drm_file *file,
-                struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+             struct drm_file *file,
+             struct drm_i915_gem_execbuffer2 *args)
  {
         struct intel_context *ce;
         unsigned int idx;
@@ -2250,7 +2278,7 @@ eb_select_engine(struct i915_execbuffer *eb,
         if (IS_ERR(ce))
                 return PTR_ERR(ce);
  
-       err = eb_pin_context(eb, ce);
+       err = __eb_pin_engine(eb, ce);
         intel_context_put(ce);
  
         return err;
@@ -2468,16 +2496,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (unlikely(err))
                 goto err_destroy;
  
-       err = i915_mutex_lock_interruptible(dev);
-       if (err)
-               goto err_context;
-
-       err = eb_select_engine(&eb, file, args);
+       err = eb_pin_engine(&eb, file, args);
         if (unlikely(err))
-               goto err_unlock;
+               goto err_context;
  
-       err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-       if (unlikely(err))
+       err = i915_mutex_lock_interruptible(dev);
+       if (err)
                 goto err_engine;
  
         err = eb_relocate(&eb);
@@ -2635,10 +2659,9 @@ err_batch_unpin:
  err_vma:
         if (eb.exec)
                 eb_release_vmas(&eb);
-err_engine:
-       eb_unpin_context(&eb);
-err_unlock:
         mutex_unlock(&dev->struct_mutex);
+err_engine:
+       eb_unpin_engine(&eb);
  err_context:
         i915_gem_context_put(eb.gem_context);
  err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h

index 9fa8b58..053a130 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
  #include "i915_active.h"
  #include "intel_context_types.h"
  #include "intel_engine_types.h"
+#include "intel_timeline_types.h"
  
  void intel_context_init(struct intel_context *ce,
                         struct i915_gem_context *ctx,
@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce)
         kref_put(&ce->ref, ce->ops->destroy);
  }
  
-static inline int __must_check
+static inline struct intel_timeline *__must_check
  intel_context_timeline_lock(struct intel_context *ce)
         __acquires(&ce->timeline->mutex)
  {
-       return mutex_lock_interruptible(&ce->timeline->mutex);
+       struct intel_timeline *tl = ce->timeline;
+       int err;
+
+       err = mutex_lock_interruptible(&tl->mutex);
+       if (err)
+               return ERR_PTR(err);
+
+       return tl;
  }
  
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
-       __releases(&ce->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+       __releases(&tl->mutex)
  {
-       mutex_unlock(&ce->timeline->mutex);
+       mutex_unlock(&tl->mutex);
  }
  
  int intel_context_prepare_remote_request(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index d20750e..957f27a 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
                                 engine->status_page.vma))
                 goto out_frame;
  
-       INIT_LIST_HEAD(&frame->ring.request_list);
         frame->ring.vaddr = frame->cs;
         frame->ring.size = sizeof(frame->cs);
         frame->ring.effective_size = frame->ring.size;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index a0f3728..9965a32 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,9 +69,6 @@ struct intel_ring {
         struct i915_vma *vma;
         void *vaddr;
  
-       struct list_head request_list;
-       struct list_head active_link;
-
         /*
          * As we have two types of rings, one global to the engine used
          * by ringbuffer submission and those that are exclusive to a
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c

index 914bd2d..d48ec9a 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -15,8 +15,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
  
         spin_lock_init(&gt->irq_lock);
  
-       INIT_LIST_HEAD(&gt->active_rings);
-
         INIT_LIST_HEAD(&gt->closed_vma);
         spin_lock_init(&gt->closed_lock);
  
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h

index 143b2d7..f882e2c 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -48,8 +48,6 @@ struct intel_gt {
                 struct list_head hwsp_free_list;
         } timelines;
  
-       struct list_head active_rings;
-
         struct intel_wakeref wakeref;
  
         struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index b4c2662..e9863f4 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1583,6 +1583,7 @@ static void execlists_context_unpin(struct intel_context *ce)
  {
         i915_gem_context_unpin_hw_id(ce->gem_context);
         i915_gem_object_unpin_map(ce->state->obj);
+       intel_ring_reset(ce->ring, ce->ring->tail);
  }
  
  static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c

index 409d764..1d9c125 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1250,7 +1250,7 @@ void intel_ring_unpin(struct intel_ring *ring)
                 return;
  
         /* Discard any unused bytes beyond that submitted to hw. */
-       intel_ring_reset(ring, ring->tail);
+       intel_ring_reset(ring, ring->emit);
  
         i915_vma_unset_ggtt_write(vma);
         if (i915_vma_is_map_and_fenceable(vma))
@@ -1311,7 +1311,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
                 return ERR_PTR(-ENOMEM);
  
         kref_init(&ring->ref);
-       INIT_LIST_HEAD(&ring->request_list);
  
         ring->size = size;
         /* Workaround an erratum on the i830 which causes a hang if
@@ -1865,7 +1864,10 @@ static int ring_request_alloc(struct i915_request *request)
         return 0;
  }
  
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
+static noinline int
+wait_for_space(struct intel_ring *ring,
+              struct intel_timeline *tl,
+              unsigned int bytes)
  {
         struct i915_request *target;
         long timeout;
@@ -1873,15 +1875,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
         if (intel_ring_update_space(ring) >= bytes)
                 return 0;
  
-       GEM_BUG_ON(list_empty(&ring->request_list));
-       list_for_each_entry(target, &ring->request_list, ring_link) {
+       GEM_BUG_ON(list_empty(&tl->requests));
+       list_for_each_entry(target, &tl->requests, link) {
+               if (target->ring != ring)
+                       continue;
+
                 /* Would completion of this request free enough space? */
                 if (bytes <= __intel_ring_space(target->postfix,
                                                 ring->emit, ring->size))
                         break;
         }
  
-       if (WARN_ON(&target->ring_link == &ring->request_list))
+       if (GEM_WARN_ON(&target->link == &tl->requests))
                 return -ENOSPC;
  
         timeout = i915_request_wait(target,
@@ -1948,7 +1953,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
                  */
                 GEM_BUG_ON(!rq->reserved_space);
  
-               ret = wait_for_space(ring, total_bytes);
+               ret = wait_for_space(ring, rq->timeline, total_bytes);
                 if (unlikely(ret))
                         return ERR_PTR(ret);
         }
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c

index 54a11dd..5d43cbc 100644 (file)
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
         ring->vaddr = (void *)(ring + 1);
         atomic_set(&ring->pin_count, 1);
  
-       INIT_LIST_HEAD(&ring->request_list);
         intel_ring_update_space(ring);
  
         return ring;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c

index da9c49e..6fbc72b 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq)
  
         i915_request_add(rq);
         timeout = i915_request_wait(rq, 0, HZ / 10);
-       if (timeout < 0)
+       if (timeout < 0) {
                 err = timeout;
-       else
+       } else {
+               mutex_lock(&rq->timeline->mutex);
                 i915_request_retire_upto(rq);
+               mutex_unlock(&rq->timeline->mutex);
+       }
  
         i915_request_put(rq);
  
@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce)
         struct intel_timeline *tl = ce->timeline;
         int err = 0;
  
+       mutex_lock(&tl->mutex);
         do {
                 struct i915_request *rq;
                 long timeout;
@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce)
  
                 i915_request_put(rq);
         } while (!err);
+       mutex_unlock(&tl->mutex);
  
         return err;
  }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 2b7645f..7cefff8 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request)
         spin_unlock(&file_priv->mm.lock);
  }
  
-static void advance_ring(struct i915_request *request)
-{
-       struct intel_ring *ring = request->ring;
-       unsigned int tail;
-
-       /*
-        * We know the GPU must have read the request to have
-        * sent us the seqno + interrupt, so use the position
-        * of tail of the request to update the last known position
-        * of the GPU head.
-        *
-        * Note this requires that we are always called in request
-        * completion order.
-        */
-       GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
-       if (list_is_last(&request->ring_link, &ring->request_list)) {
-               /*
-                * We may race here with execlists resubmitting this request
-                * as we retire it. The resubmission will move the ring->tail
-                * forwards (to request->wa_tail). We either read the
-                * current value that was written to hw, or the value that
-                * is just about to be. Either works, if we miss the last two
-                * noops - they are safe to be replayed on a reset.
-                */
-               tail = READ_ONCE(request->tail);
-               list_del(&ring->active_link);
-       } else {
-               tail = request->postfix;
-       }
-       list_del_init(&request->ring_link);
-
-       ring->head = tail;
-}
-
  static void free_capture_list(struct i915_request *request)
  {
         struct i915_capture_list *capture;
@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq)
  {
         struct i915_active_request *active, *next;
  
-       lockdep_assert_held(&rq->i915->drm.struct_mutex);
+       lockdep_assert_held(&rq->timeline->mutex);
         if (!i915_request_completed(rq))
                 return false;
  
@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq)
         GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
         trace_i915_request_retire(rq);
  
-       advance_ring(rq);
+       /*
+        * We know the GPU must have read the request to have
+        * sent us the seqno + interrupt, so use the position
+        * of tail of the request to update the last known position
+        * of the GPU head.
+        *
+        * Note this requires that we are always called in request
+        * completion order.
+        */
+       GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+       rq->ring->head = rq->postfix;
  
         /*
          * Walk through the active list, calling retire on each. This allows
@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq)
  
  void i915_request_retire_upto(struct i915_request *rq)
  {
-       struct intel_ring *ring = rq->ring;
+       struct intel_timeline * const tl = rq->timeline;
         struct i915_request *tmp;
  
         GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq)
                   rq->fence.context, rq->fence.seqno,
                   hwsp_seqno(rq));
  
-       lockdep_assert_held(&rq->i915->drm.struct_mutex);
+       lockdep_assert_held(&tl->mutex);
         GEM_BUG_ON(!i915_request_completed(rq));
  
-       if (list_empty(&rq->ring_link))
-               return;
-
         do {
-               tmp = list_first_entry(&ring->request_list,
-                                      typeof(*tmp), ring_link);
+               tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
         } while (i915_request_retire(tmp) && tmp != rq);
  }
  
@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
         return NOTIFY_DONE;
  }
  
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
  {
         struct i915_request *rq, *rn;
  
-       list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+       list_for_each_entry_safe(rq, rn, &tl->requests, link)
                 if (!i915_request_retire(rq))
                         break;
  }
  
  static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
  {
-       struct intel_ring *ring = ce->ring;
         struct i915_request *rq;
  
-       if (list_empty(&ring->request_list))
+       if (list_empty(&tl->requests))
                 goto out;
  
         if (!gfpflags_allow_blocking(gfp))
                 goto out;
  
         /* Move our oldest request to the slab-cache (if not in use!) */
-       rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+       rq = list_first_entry(&tl->requests, typeof(*rq), link);
         i915_request_retire(rq);
  
         rq = kmem_cache_alloc(global.slab_requests,
@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
                 return rq;
  
         /* Ratelimit ourselves to prevent oom from malicious clients */
-       rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+       rq = list_last_entry(&tl->requests, typeof(*rq), link);
         cond_synchronize_rcu(rq->rcustate);
  
         /* Retire our old requests in the hope that we free some */
-       ring_retire_requests(ring);
+       retire_requests(tl);
  
  out:
         return kmem_cache_alloc(global.slab_requests, gfp);
@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
         rq = kmem_cache_alloc(global.slab_requests,
                               gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
         if (unlikely(!rq)) {
-               rq = request_alloc_slow(ce, gfp);
+               rq = request_alloc_slow(tl, gfp);
                 if (!rq) {
                         ret = -ENOMEM;
                         goto err_unreserve;
@@ -742,15 +713,15 @@ struct i915_request *
  i915_request_create(struct intel_context *ce)
  {
         struct i915_request *rq;
-       int err;
+       struct intel_timeline *tl;
  
-       err = intel_context_timeline_lock(ce);
-       if (err)
-               return ERR_PTR(err);
+       tl = intel_context_timeline_lock(ce);
+       if (IS_ERR(tl))
+               return ERR_CAST(tl);
  
         /* Move our oldest request to the slab-cache (if not in use!) */
-       rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-       if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+       rq = list_first_entry(&tl->requests, typeof(*rq), link);
+       if (!list_is_last(&rq->link, &tl->requests))
                 i915_request_retire(rq);
  
         intel_context_enter(ce);
@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce)
                 goto err_unlock;
  
         /* Check that we do not interrupt ourselves with a new request */
-       rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+       rq->cookie = lockdep_pin_lock(&tl->mutex);
  
         return rq;
  
  err_unlock:
-       intel_context_timeline_unlock(ce);
+       intel_context_timeline_unlock(tl);
         return rq;
  }
  
  static int
  i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
  {
-       if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+       if (list_is_first(&signal->link, &signal->timeline->requests))
                 return 0;
  
-       signal = list_prev_entry(signal, ring_link);
+       signal = list_prev_entry(signal, link);
         if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
                 return 0;
  
@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
  {
         struct intel_engine_cs *engine = rq->engine;
         struct intel_ring *ring = rq->ring;
-       struct i915_request *prev;
         u32 *cs;
  
         GEM_TRACE("%s fence %llx:%lld\n",
@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
          */
         GEM_BUG_ON(rq->reserved_space > ring->space);
         rq->reserved_space = 0;
+       rq->emitted_jiffies = jiffies;
  
         /*
          * Record the position of the start of the breadcrumb so that
@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
         GEM_BUG_ON(IS_ERR(cs));
         rq->postfix = intel_ring_offset(rq, cs);
  
-       prev = __i915_request_add_to_timeline(rq);
-
-       list_add_tail(&rq->ring_link, &ring->request_list);
-       if (list_is_first(&rq->ring_link, &ring->request_list))
-               list_add(&ring->active_link, &rq->i915->gt.active_rings);
-       rq->emitted_jiffies = jiffies;
-
-       return prev;
+       return __i915_request_add_to_timeline(rq);
  }
  
  void __i915_request_queue(struct i915_request *rq,
@@ -1212,10 +1176,11 @@ void __i915_request_queue(struct i915_request *rq,
  void i915_request_add(struct i915_request *rq)
  {
         struct i915_sched_attr attr = rq->gem_context->sched;
+       struct intel_timeline * const tl = rq->timeline;
         struct i915_request *prev;
  
-       lockdep_assert_held(&rq->timeline->mutex);
-       lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+       lockdep_assert_held(&tl->mutex);
+       lockdep_unpin_lock(&tl->mutex, rq->cookie);
  
         trace_i915_request_add(rq);
  
@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
          * work on behalf of others -- but instead we should benefit from
          * improved resource management. (Well, that's the theory at least.)
          */
-       if (prev && i915_request_completed(prev))
+       if (prev && i915_request_completed(prev) && prev->timeline == tl)
                 i915_request_retire_upto(prev);
  
-       mutex_unlock(&rq->timeline->mutex);
+       mutex_unlock(&tl->mutex);
  }
  
  static unsigned long local_clock_us(unsigned int *cpu)
@@ -1489,18 +1454,43 @@ out:
  
  bool i915_retire_requests(struct drm_i915_private *i915)
  {
-       struct intel_ring *ring, *tmp;
+       struct intel_gt_timelines *timelines = &i915->gt.timelines;
+       struct intel_timeline *tl, *tn;
+       LIST_HEAD(free);
+
+       spin_lock(&timelines->lock);
+       list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+               if (!mutex_trylock(&tl->mutex))
+                       continue;
  
-       lockdep_assert_held(&i915->drm.struct_mutex);
+               intel_timeline_get(tl);
+               GEM_BUG_ON(!tl->active_count);
+               tl->active_count++; /* pin the list element */
+               spin_unlock(&timelines->lock);
  
-       list_for_each_entry_safe(ring, tmp,
-                                &i915->gt.active_rings, active_link) {
-               intel_ring_get(ring); /* last rq holds reference! */
-               ring_retire_requests(ring);
-               intel_ring_put(ring);
+               retire_requests(tl);
+
+               spin_lock(&timelines->lock);
+
+               /* Resume iteration after dropping lock */
+               list_safe_reset_next(tl, tn, link);
+               if (!--tl->active_count)
+                       list_del(&tl->link);
+
+               mutex_unlock(&tl->mutex);
+
+               /* Defer the final release to after the spinlock */
+               if (refcount_dec_and_test(&tl->kref.refcount)) {
+                       GEM_BUG_ON(tl->active_count);
+                       list_add(&tl->link, &free);
+               }
         }
+       spin_unlock(&timelines->lock);
+
+       list_for_each_entry_safe(tl, tn, &free, link)
+               __intel_timeline_free(&tl->kref);
  
-       return !list_empty(&i915->gt.active_rings);
+       return !list_empty(&timelines->active_list);
  }
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index fec1d5f..8ac6e12 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -223,9 +223,6 @@ struct i915_request {
         /** timeline->request entry for this request */
         struct list_head link;
  
-       /** ring->request_list entry for this request */
-       struct list_head ring_link;
-
         struct drm_i915_file_private *file_priv;
         /** file_priv list entry for this request */
         struct list_head client_link;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 15 Aug 2019 20:57:09 +0000 (21:57 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 15 Aug 2019 22:21:13 +0000 (23:21 +0100)
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_gt.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_gt_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_ringbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/mock_engine.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history