drm/i915/gt: Fix hold/unhold recursion
authorChris Wilson <chris@chris-wilson.co.uk>
Sun, 9 Feb 2020 13:19:15 +0000 (13:19 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Sun, 9 Feb 2020 13:46:40 +0000 (13:46 +0000)
In eliminating the recursion from walking the tree of signalers/waiters
for processing the hold/unhold operations, a crucial error crept in
where we looked at the parent request and not the list element when
processing the list.

Brown paper bag, much?

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1166
Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
Fixes: 748317386afb ("drm/i915/execlists: Offline error capture")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200209131922.180287-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c

index 4bedc66..2138507 100644 (file)
@@ -2374,10 +2374,10 @@ static void __execlists_hold(struct i915_request *rq)
                if (i915_request_is_active(rq))
                        __i915_request_unsubmit(rq);
 
-               RQ_TRACE(rq, "on hold\n");
                clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
                list_move_tail(&rq->sched.link, &rq->engine->active.hold);
                i915_request_set_hold(rq);
+               RQ_TRACE(rq, "on hold\n");
 
                for_each_waiter(p, rq) {
                        struct i915_request *w =
@@ -2393,7 +2393,7 @@ static void __execlists_hold(struct i915_request *rq)
                        if (i915_request_completed(w))
                                continue;
 
-                       if (i915_request_on_hold(rq))
+                       if (i915_request_on_hold(w))
                                continue;
 
                        list_move_tail(&w->sched.link, &list);
@@ -2451,6 +2451,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
        GEM_BUG_ON(i915_request_on_hold(rq));
        GEM_BUG_ON(rq->engine != engine);
        __execlists_hold(rq);
+       GEM_BUG_ON(list_empty(&engine->active.hold));
 
 unlock:
        spin_unlock_irq(&engine->active.lock);
@@ -2486,6 +2487,8 @@ static void __execlists_unhold(struct i915_request *rq)
        do {
                struct i915_dependency *p;
 
+               RQ_TRACE(rq, "hold release\n");
+
                GEM_BUG_ON(!i915_request_on_hold(rq));
                GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 
@@ -2494,7 +2497,6 @@ static void __execlists_unhold(struct i915_request *rq)
                               i915_sched_lookup_priolist(rq->engine,
                                                          rq_prio(rq)));
                set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-               RQ_TRACE(rq, "hold release\n");
 
                /* Also release any children on this engine that are ready */
                for_each_waiter(p, rq) {
@@ -2504,11 +2506,11 @@ static void __execlists_unhold(struct i915_request *rq)
                        if (w->engine != rq->engine)
                                continue;
 
-                       if (!i915_request_on_hold(rq))
+                       if (!i915_request_on_hold(w))
                                continue;
 
                        /* Check that no other parents are also on hold */
-                       if (hold_request(rq))
+                       if (hold_request(w))
                                continue;
 
                        list_move_tail(&w->sched.link, &list);
@@ -2806,6 +2808,7 @@ static void execlists_submit_request(struct i915_request *request)
        spin_lock_irqsave(&engine->active.lock, flags);
 
        if (unlikely(ancestor_on_hold(engine, request))) {
+               RQ_TRACE(request, "ancestor on hold\n");
                list_add_tail(&request->sched.link, &engine->active.hold);
                i915_request_set_hold(request);
        } else {