drm/i915/gt: Restore ce->signal flush before releasing virtual engine
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 8 Jan 2021 20:40:23 +0000 (20:40 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 8 Jan 2021 21:35:55 +0000 (21:35 +0000)
Before we mark the virtual engine as no longer inflight, flush any
ongoing signaling that may be using the ce->signal_link along the
previous breadcrumbs. On switch to a new physical engine, that link will
be inserted into the new set of breadcrumbs, causing confusion to an
ongoing iterator.

This patch undoes a last minute mistake introduced into commit
bab0557c8dca ("drm/i915/gt: Remove virtual breadcrumb before transfer"),
whereby instead of unconditionally applying the flush, it was only
applied if the request itself was going to be reused.

v2: Generalise and cancel all remaining ce->signals

Fixes: bab0557c8dca ("drm/i915/gt: Remove virtual breadcrumb before transfer")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210108204026.20682-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
drivers/gpu/drm/i915/gt/intel_execlists_submission.c

index 2eabb9ab5d47c7c5cdca1f135968d481a63f7f71..7137b6f24f550fb402b61d218bbda058cf5b433f 100644 (file)
@@ -472,6 +472,39 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
        i915_request_put(rq);
 }
 
+void intel_context_remove_breadcrumbs(struct intel_context *ce,
+                                     struct intel_breadcrumbs *b)
+{
+       struct i915_request *rq, *rn;
+       bool release = false;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ce->signal_lock, flags);
+
+       if (list_empty(&ce->signals))
+               goto unlock;
+
+       list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
+               GEM_BUG_ON(!__i915_request_is_complete(rq));
+               if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
+                                       &rq->fence.flags))
+                       continue;
+
+               list_del_rcu(&rq->signal_link);
+               irq_signal_request(rq, b);
+               i915_request_put(rq);
+       }
+       release = remove_signaling_context(b, ce);
+
+unlock:
+       spin_unlock_irqrestore(&ce->signal_lock, flags);
+       if (release)
+               intel_context_put(ce);
+
+       while (atomic_read(&b->signaler_active))
+               cpu_relax();
+}
+
 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 {
        struct intel_context *ce;
index 75cc9cff3ae3cccba42e33feccf0cf42f6fa2d0a..3ce5ce270b0446797e7f865f78af5a66bf847928 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef __INTEL_BREADCRUMBS__
 #define __INTEL_BREADCRUMBS__
 
+#include <linux/atomic.h>
 #include <linux/irq_work.h>
 
 #include "intel_engine_types.h"
@@ -44,4 +45,7 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 bool i915_request_enable_breadcrumb(struct i915_request *request);
 void i915_request_cancel_breadcrumb(struct i915_request *request);
 
+void intel_context_remove_breadcrumbs(struct intel_context *ce,
+                                     struct intel_breadcrumbs *b);
+
 #endif /* __INTEL_BREADCRUMBS__ */
index 2f8e10450f7e42bd7975b1e3368eba6cb8fbee3f..eb69eef9d7db8d6025ff2e4f804678a374133093 100644 (file)
@@ -581,21 +581,6 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
 {
        struct intel_engine_cs *engine = rq->engine;
 
-       /* Flush concurrent rcu iterators in signal_irq_work */
-       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) {
-               /*
-                * After this point, the rq may be transferred to a new
-                * sibling, so before we clear ce->inflight make sure that
-                * the context has been removed from the b->signalers and
-                * furthermore we need to make sure that the concurrent
-                * iterator in signal_irq_work is no longer following
-                * ce->signal_link.
-                */
-               i915_request_cancel_breadcrumb(rq);
-               while (atomic_read(&engine->breadcrumbs->signaler_active))
-                       cpu_relax();
-       }
-
        spin_lock_irq(&engine->active.lock);
 
        clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
@@ -610,6 +595,16 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
        struct intel_engine_cs *engine = rq->engine;
 
+       /*
+        * After this point, the rq may be transferred to a new sibling, so
+        * before we clear ce->inflight make sure that the context has been
+        * removed from the b->signalers and furthermore we need to make sure
+        * that the concurrent iterator in signal_irq_work is no longer
+        * following ce->signal_link.
+        */
+       if (!list_empty(&ce->signals))
+               intel_context_remove_breadcrumbs(ce, engine->breadcrumbs);
+
        /*
         * This engine is now too busy to run this virtual request, so
         * see if we can find an alternative engine for it to execute on.