drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 22 Feb 2018 09:25:44 +0000 (09:25 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 6 Mar 2018 12:12:45 +0000 (12:12 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 22 Feb 2018 09:25:44 +0000 (09:25 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 6 Mar 2018 12:12:45 +0000 (12:12 +0000)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 74311fc..7d6eb82 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -44,8 +44,8 @@ struct intel_wait {
  };
  
  struct intel_signal_node {
-       struct rb_node node;
         struct intel_wait wait;
+       struct list_head link;
  };
  
  struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c

index 094f010..03bbc1d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -340,7 +340,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
         lockdep_assert_held(&b->rb_lock);
         GEM_BUG_ON(b->irq_wait == wait);
  
-       /* This request is completed, so remove it from the tree, mark it as
+       /*
+        * This request is completed, so remove it from the tree, mark it as
          * complete, and *then* wake up the associated task. N.B. when the
          * task wakes up, it will find the empty rb_node, discern that it
          * has already been removed from the tree and skip the serialisation
@@ -351,7 +352,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
         rb_erase(&wait->node, &b->waiters);
         RB_CLEAR_NODE(&wait->node);
  
-       wake_up_process(wait->tsk); /* implicit smp_wmb() */
+       if (wait->tsk->state != TASK_RUNNING)
+               wake_up_process(wait->tsk); /* implicit smp_wmb() */
  }
  
  static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
@@ -592,23 +594,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
         spin_unlock_irq(&b->rb_lock);
  }
  
-static bool signal_complete(const struct i915_request *request)
-{
-       if (!request)
-               return false;
-
-       /*
-        * Carefully check if the request is complete, giving time for the
-        * seqno to be visible or if the GPU hung.
-        */
-       return __i915_request_irq_complete(request);
-}
-
-static struct i915_request *to_signaler(struct rb_node *rb)
-{
-       return rb_entry(rb, struct i915_request, signaling.node);
-}
-
  static void signaler_set_rtpriority(void)
  {
          struct sched_param param = { .sched_priority = 1 };
@@ -616,78 +601,26 @@ static void signaler_set_rtpriority(void)
          sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
  }
  
-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
-                                        struct i915_request *request)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-       lockdep_assert_held(&b->rb_lock);
-
-       /*
-        * Wake up all other completed waiters and select the
-        * next bottom-half for the next user interrupt.
-        */
-       __intel_engine_remove_wait(engine, &request->signaling.wait);
-
-       /*
-        * Find the next oldest signal. Note that as we have
-        * not been holding the lock, another client may
-        * have installed an even older signal than the one
-        * we just completed - so double check we are still
-        * the oldest before picking the next one.
-        */
-       if (request->signaling.wait.seqno) {
-               if (request == rcu_access_pointer(b->first_signal)) {
-                       struct rb_node *rb = rb_next(&request->signaling.node);
-                       rcu_assign_pointer(b->first_signal,
-                                          rb ? to_signaler(rb) : NULL);
-               }
-
-               rb_erase(&request->signaling.node, &b->signals);
-               request->signaling.wait.seqno = 0;
-       }
-}
-
-static struct i915_request *
-get_first_signal_rcu(struct intel_breadcrumbs *b)
-{
-       /*
-        * See the big warnings for i915_gem_active_get_rcu() and similarly
-        * for dma_fence_get_rcu_safe() that explain the intricacies involved
-        * here with defeating CPU/compiler speculation and enforcing
-        * the required memory barriers.
-        */
-       do {
-               struct i915_request *request;
-
-               request = rcu_dereference(b->first_signal);
-               if (request)
-                       request = i915_request_get_rcu(request);
-
-               barrier();
-
-               if (!request || request == rcu_access_pointer(b->first_signal))
-                       return rcu_pointer_handoff(request);
-
-               i915_request_put(request);
-       } while (1);
-}
-
  static int intel_breadcrumbs_signaler(void *arg)
  {
         struct intel_engine_cs *engine = arg;
         struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct i915_request *request;
+       struct i915_request *rq, *n;
  
         /* Install ourselves with high priority to reduce signalling latency */
         signaler_set_rtpriority();
  
         do {
                 bool do_schedule = true;
+               LIST_HEAD(list);
+               u32 seqno;
  
                 set_current_state(TASK_INTERRUPTIBLE);
+               if (list_empty(&b->signals))
+                       goto sleep;
  
-               /* We are either woken up by the interrupt bottom-half,
+               /*
+                * We are either woken up by the interrupt bottom-half,
                  * or by a client adding a new signaller. In both cases,
                  * the GPU seqno may have advanced beyond our oldest signal.
                  * If it has, propagate the signal, remove the waiter and
@@ -695,25 +628,45 @@ static int intel_breadcrumbs_signaler(void *arg)
                  * need to wait for a new interrupt from the GPU or for
                  * a new client.
                  */
-               rcu_read_lock();
-               request = get_first_signal_rcu(b);
-               rcu_read_unlock();
-               if (signal_complete(request)) {
-                       if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-                                     &request->fence.flags)) {
-                               local_bh_disable();
-                               dma_fence_signal(&request->fence);
-                               GEM_BUG_ON(!i915_request_completed(request));
-                               local_bh_enable(); /* kick start the tasklets */
-                       }
+               seqno = intel_engine_get_seqno(engine);
+
+               spin_lock_irq(&b->rb_lock);
+               list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
+                       u32 this = rq->signaling.wait.seqno;
+
+                       GEM_BUG_ON(!rq->signaling.wait.seqno);
  
-                       if (READ_ONCE(request->signaling.wait.seqno)) {
-                               spin_lock_irq(&b->rb_lock);
-                               __intel_engine_remove_signal(engine, request);
-                               spin_unlock_irq(&b->rb_lock);
+                       if (!i915_seqno_passed(seqno, this))
+                               break;
+
+                       if (likely(this == i915_request_global_seqno(rq))) {
+                               __intel_engine_remove_wait(engine,
+                                                          &rq->signaling.wait);
+
+                               rq->signaling.wait.seqno = 0;
+                               __list_del_entry(&rq->signaling.link);
+
+                               if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                                             &rq->fence.flags)) {
+                                       list_add_tail(&rq->signaling.link,
+                                                     &list);
+                                       i915_request_get(rq);
+                               }
+                       }
+               }
+               spin_unlock_irq(&b->rb_lock);
+
+               if (!list_empty(&list)) {
+                       local_bh_disable();
+                       list_for_each_entry_safe(rq, n, &list, signaling.link) {
+                               dma_fence_signal(&rq->fence);
+                               GEM_BUG_ON(!i915_request_completed(rq));
+                               i915_request_put(rq);
                         }
+                       local_bh_enable(); /* kick start the tasklets */
  
-                       /* If the engine is saturated we may be continually
+                       /*
+                        * If the engine is saturated we may be continually
                          * processing completed requests. This angers the
                          * NMI watchdog if we never let anything else
                          * have access to the CPU. Let's pretend to be nice
@@ -722,9 +675,19 @@ static int intel_breadcrumbs_signaler(void *arg)
                          */
                         do_schedule = need_resched();
                 }
-               i915_request_put(request);
  
                 if (unlikely(do_schedule)) {
+                       /* Before we sleep, check for a missed seqno */
+                       if (current->state & TASK_NORMAL &&
+                           !list_empty(&b->signals) &&
+                           engine->irq_seqno_barrier &&
+                           test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
+                                              &engine->irq_posted)) {
+                               engine->irq_seqno_barrier(engine);
+                               intel_engine_wakeup(engine);
+                       }
+
+sleep:
                         if (kthread_should_park())
                                 kthread_parkme();
  
@@ -739,13 +702,40 @@ static int intel_breadcrumbs_signaler(void *arg)
         return 0;
  }
  
+static void insert_signal(struct intel_breadcrumbs *b,
+                         struct i915_request *request,
+                         const u32 seqno)
+{
+       struct i915_request *iter;
+
+       lockdep_assert_held(&b->rb_lock);
+
+       /*
+        * A reasonable assumption is that we are called to add signals
+        * in sequence, as the requests are submitted for execution and
+        * assigned a global_seqno. This will be the case for the majority
+        * of internally generated signals (inter-engine signaling).
+        *
+        * Out of order waiters triggering random signaling enabling will
+        * be more problematic, but hopefully rare enough and the list
+        * small enough that the O(N) insertion sort is not an issue.
+        */
+
+       list_for_each_entry_reverse(iter, &b->signals, signaling.link)
+               if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
+                       break;
+
+       list_add(&request->signaling.link, &iter->signaling.link);
+}
+
  void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
  {
         struct intel_engine_cs *engine = request->engine;
         struct intel_breadcrumbs *b = &engine->breadcrumbs;
         u32 seqno;
  
-       /* Note that we may be called from an interrupt handler on another
+       /*
+        * Note that we may be called from an interrupt handler on another
          * device (e.g. nouveau signaling a fence completion causing us
          * to submit a request, and so enable signaling). As such,
          * we need to make sure that all other users of b->rb_lock protect
@@ -757,17 +747,16 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
         lockdep_assert_held(&request->lock);
  
         seqno = i915_request_global_seqno(request);
-       if (!seqno)
+       if (!seqno) /* will be enabled later upon execution */
                 return;
  
-       spin_lock(&b->rb_lock);
-
         GEM_BUG_ON(request->signaling.wait.seqno);
         request->signaling.wait.tsk = b->signaler;
         request->signaling.wait.request = request;
         request->signaling.wait.seqno = seqno;
  
-       /* First add ourselves into the list of waiters, but register our
+       /*
+        * Add ourselves into the list of waiters, but registering our
          * bottom-half as the signaller thread. As per usual, only the oldest
          * waiter (not just signaller) is tasked as the bottom-half waking
          * up all completed waiters after the user interrupt.
@@ -775,39 +764,9 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
          * If we are the oldest waiter, enable the irq (after which we
          * must double check that the seqno did not complete).
          */
+       spin_lock(&b->rb_lock);
+       insert_signal(b, request, seqno);
         wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
-
-       if (!__i915_request_completed(request, seqno)) {
-               struct rb_node *parent, **p;
-               bool first;
-
-               /* Now insert ourselves into the retirement ordered list of
-                * signals on this engine. We track the oldest seqno as that
-                * will be the first signal to complete.
-                */
-               parent = NULL;
-               first = true;
-               p = &b->signals.rb_node;
-               while (*p) {
-                       parent = *p;
-                       if (i915_seqno_passed(seqno,
-                                             to_signaler(parent)->signaling.wait.seqno)) {
-                               p = &parent->rb_right;
-                               first = false;
-                       } else {
-                               p = &parent->rb_left;
-                       }
-               }
-               rb_link_node(&request->signaling.node, parent, p);
-               rb_insert_color(&request->signaling.node, &b->signals);
-               if (first)
-                       rcu_assign_pointer(b->first_signal, request);
-       } else {
-               __intel_engine_remove_wait(engine, &request->signaling.wait);
-               request->signaling.wait.seqno = 0;
-               wakeup = false;
-       }
-
         spin_unlock(&b->rb_lock);
  
         if (wakeup)
@@ -816,17 +775,20 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
  
  void intel_engine_cancel_signaling(struct i915_request *request)
  {
+       struct intel_engine_cs *engine = request->engine;
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
         GEM_BUG_ON(!irqs_disabled());
         lockdep_assert_held(&request->lock);
  
-       if (READ_ONCE(request->signaling.wait.seqno)) {
-               struct intel_engine_cs *engine = request->engine;
-               struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       if (!READ_ONCE(request->signaling.wait.seqno))
+               return;
  
-               spin_lock(&b->rb_lock);
-               __intel_engine_remove_signal(engine, request);
-               spin_unlock(&b->rb_lock);
-       }
+       spin_lock(&b->rb_lock);
+       __intel_engine_remove_wait(engine, &request->signaling.wait);
+       if (fetch_and_zero(&request->signaling.wait.seqno))
+               __list_del_entry(&request->signaling.link);
+       spin_unlock(&b->rb_lock);
  }
  
  int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -840,6 +802,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
         timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
         timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
  
+       INIT_LIST_HEAD(&b->signals);
+
         /* Spawn a thread to provide a common bottom-half for all signals.
          * As this is an asynchronous interface we cannot steal the current
          * task for handling the bottom-half to the user interrupt, therefore
@@ -899,8 +863,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
         /* The engines should be idle and all requests accounted for! */
         WARN_ON(READ_ONCE(b->irq_wait));
         WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-       WARN_ON(rcu_access_pointer(b->first_signal));
-       WARN_ON(!RB_EMPTY_ROOT(&b->signals));
+       WARN_ON(!list_empty(&b->signals));
  
         if (!IS_ERR_OR_NULL(b->signaler))
                 kthread_stop(b->signaler);
@@ -913,20 +876,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
         struct intel_breadcrumbs *b = &engine->breadcrumbs;
         bool busy = false;
  
-       spin_lock_irq(&b->rb_lock);
-
         if (b->irq_wait) {
-               wake_up_process(b->irq_wait->tsk);
-               busy = true;
+               spin_lock_irq(&b->irq_lock);
+
+               if (b->irq_wait) {
+                       wake_up_process(b->irq_wait->tsk);
+                       busy = true;
+               }
+
+               spin_unlock_irq(&b->irq_lock);
         }
  
-       if (rcu_access_pointer(b->first_signal)) {
+       if (!busy && !list_empty(&b->signals)) {
                 wake_up_process(b->signaler);
                 busy = true;
         }
  
-       spin_unlock_irq(&b->rb_lock);
-
         return busy;
  }
  
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index 90e4380..e7526a4 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -356,9 +356,9 @@ struct intel_engine_cs {
  
                 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
                 struct rb_root waiters; /* sorted by retirement, priority */
-               struct rb_root signals; /* sorted by retirement */
+               struct list_head signals; /* sorted by retirement */
                 struct task_struct *signaler; /* used for fence signalling */
-               struct i915_request __rcu *first_signal;
+
                 struct timer_list fake_irq; /* used after a missed interrupt */
                 struct timer_list hangcheck; /* detect missed interrupts */
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 22 Feb 2018 09:25:44 +0000 (09:25 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 6 Mar 2018 12:12:45 +0000 (12:12 +0000)
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_breadcrumbs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.h		patch \| blob \| history