drm/i915: Remove i915_request.lock requirement for execution callbacks

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 16 Jul 2020 14:22:07 +0000 (15:22 +0100)

committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Mon, 7 Sep 2020 12:08:06 +0000 (15:08 +0300)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 16 Jul 2020 14:22:07 +0000 (15:22 +0100)
committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Mon, 7 Sep 2020 12:08:06 +0000 (15:08 +0300)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 4f7f67a..11e2724 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -187,30 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
         irq_execute_cb(wrk);
  }
  
-static void __notify_execute_cb(struct i915_request *rq)
+static __always_inline void
+__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
  {
         struct execute_cb *cb, *cn;
  
-       lockdep_assert_held(&rq->lock);
-
-       GEM_BUG_ON(!i915_request_is_active(rq));
         if (llist_empty(&rq->execute_cb))
                 return;
  
-       llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
-               irq_work_queue(&cb->work);
+       llist_for_each_entry_safe(cb, cn,
+                                 llist_del_all(&rq->execute_cb),
+                                 work.llnode)
+               fn(&cb->work);
+}
  
-       /*
-        * XXX Rollback on __i915_request_unsubmit()
-        *
-        * In the future, perhaps when we have an active time-slicing scheduler,
-        * it will be interesting to unsubmit parallel execution and remove
-        * busywaits from the GPU until their master is restarted. This is
-        * quite hairy, we have to carefully rollback the fence and do a
-        * preempt-to-idle cycle on the target engine, all the while the
-        * master execute_cb may refire.
-        */
-       init_llist_head(&rq->execute_cb);
+static void __notify_execute_cb_irq(struct i915_request *rq)
+{
+       __notify_execute_cb(rq, irq_work_queue);
+}
+
+static bool irq_work_imm(struct irq_work *wrk)
+{
+       wrk->func(wrk);
+       return false;
+}
+
+static void __notify_execute_cb_imm(struct i915_request *rq)
+{
+       __notify_execute_cb(rq, irq_work_imm);
  }
  
  static void free_capture_list(struct i915_request *request)
@@ -257,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq)
                 locked = engine;
         }
         list_del_init(&rq->sched.link);
+
         clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
         clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+
+       /* Prevent further __await_execution() registering a cb, then flush */
+       set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+
         spin_unlock_irq(&locked->active.lock);
+
+       __notify_execute_cb_imm(rq);
  }
  
  bool i915_request_retire(struct i915_request *rq)
@@ -271,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq)
  
         GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
         trace_i915_request_retire(rq);
+       i915_request_mark_complete(rq);
  
         /*
          * We know the GPU must have read the request to have
@@ -288,15 +300,6 @@ bool i915_request_retire(struct i915_request *rq)
                 __i915_request_fill(rq, POISON_FREE);
         rq->ring->head = rq->postfix;
  
-       /*
-        * We only loosely track inflight requests across preemption,
-        * and so we may find ourselves attempting to retire a _completed_
-        * request that we have removed from the HW and put back on a run
-        * queue.
-        */
-       remove_from_engine(rq);
-
-       i915_request_mark_complete(rq);
         if (!i915_request_signaled(rq)) {
                 spin_lock_irq(&rq->lock);
                 dma_fence_signal_locked(&rq->fence);
@@ -320,7 +323,6 @@ bool i915_request_retire(struct i915_request *rq)
          */
         remove_from_engine(rq);
         GEM_BUG_ON(!llist_empty(&rq->execute_cb));
-       spin_unlock_irq(&rq->lock);
  
         __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
  
@@ -348,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq)
         } while (i915_request_retire(tmp) && tmp != rq);
  }
  
-static void __llist_add(struct llist_node *node, struct llist_head *head)
-{
-       node->next = head->first;
-       head->first = node;
-}
-
  static struct i915_request * const *
  __engine_active(struct intel_engine_cs *engine)
  {
@@ -451,18 +447,24 @@ __await_execution(struct i915_request *rq,
                 cb->work.func = irq_execute_cb_hook;
         }
  
-       spin_lock_irq(&signal->lock);
-       if (i915_request_is_active(signal) || __request_in_flight(signal)) {
-               if (hook) {
-                       hook(rq, &signal->fence);
-                       i915_request_put(signal);
-               }
-               i915_sw_fence_complete(cb->fence);
-               kmem_cache_free(global.slab_execute_cbs, cb);
-       } else {
-               __llist_add(&cb->work.llnode, &signal->execute_cb);
+       /*
+        * Register the callback first, then see if the signaler is already
+        * active. This ensures that if we race with the
+        * __notify_execute_cb from i915_request_submit() and we are not
+        * included in that list, we get a second bite of the cherry and
+        * execute it ourselves. After this point, a future
+        * i915_request_submit() will notify us.
+        *
+        * In i915_request_retire() we set the ACTIVE bit on a completed
+        * request (then flush the execute_cb). So by registering the
+        * callback first, then checking the ACTIVE bit, we serialise with
+        * the completed/retired request.
+        */
+       if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
+               if (i915_request_is_active(signal) ||
+                   __request_in_flight(signal))
+                       __notify_execute_cb_imm(signal);
         }
-       spin_unlock_irq(&signal->lock);
  
         return 0;
  }
@@ -588,10 +590,19 @@ xfer:
          * preempt-to-idle cycle on the target engine, all the while the
          * master execute_cb may refire.
          */
-       __notify_execute_cb(request);
+       __notify_execute_cb_irq(request);
  
-       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               i915_request_enable_breadcrumb(request);
+       /* We may be recursing from the signal callback of another i915 fence */
+       if (!i915_request_signaled(request)) {
+               spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+               if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+                            &request->fence.flags) &&
+                   !i915_request_enable_breadcrumb(request))
+                       intel_engine_signal_breadcrumbs(engine);
+
+               spin_unlock(&request->lock);
+       }
  
         return result;
  }
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 16 Jul 2020 14:22:07 +0000 (15:22 +0100)
committer	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
	Mon, 7 Sep 2020 12:08:06 +0000 (15:08 +0300)