i915/guc/reset: Make __guc_reset_context aware of guilty engines
authorUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Tue, 26 Apr 2022 00:30:45 +0000 (17:30 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Thu, 12 May 2022 18:42:37 +0000 (11:42 -0700)
There are 2 ways an engine can get reset in i915 and the method of reset
affects how KMD labels a context as guilty/innocent.

(1) GuC initiated engine-reset: GuC resets a hung engine and notifies
KMD. The context that hung on the engine is marked guilty and all other
contexts are innocent. The innocent contexts are resubmitted.

(2) GT based reset: When an engine heartbeat fails to tick, KMD
initiates a gt/chip reset. All active contexts are marked as guilty and
discarded.

In order to correctly mark the contexts as guilty/innocent, pass a mask
of engines that were reset to __guc_reset_context.

Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220426003045.3929439-1-umesh.nerlige.ramappa@intel.com
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_uc.c
drivers/gpu/drm/i915/gt/uc/intel_uc.h

index 894f17f..11bf33f 100644 (file)
@@ -808,7 +808,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
                __intel_engine_reset(engine, stalled_mask & engine->mask);
        local_bh_enable();
 
-       intel_uc_reset(&gt->uc, true);
+       intel_uc_reset(&gt->uc, ALL_ENGINES);
 
        intel_ggtt_restore_fences(gt->ggtt);
 
index 3f3373f..966e69a 100644 (file)
@@ -443,7 +443,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc);
 void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled);
 void intel_guc_submission_reset_finish(struct intel_guc *guc);
 void intel_guc_submission_cancel_requests(struct intel_guc *guc);
 
index 8bf8b6d..5a1dfac 100644 (file)
@@ -1654,9 +1654,9 @@ __unwind_incomplete_requests(struct intel_context *ce)
        spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
-static void __guc_reset_context(struct intel_context *ce, bool stalled)
+static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
 {
-       bool local_stalled;
+       bool guilty;
        struct i915_request *rq;
        unsigned long flags;
        u32 head;
@@ -1684,7 +1684,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                if (!intel_context_is_pinned(ce))
                        goto next_context;
 
-               local_stalled = false;
+               guilty = false;
                rq = intel_context_find_active_request(ce);
                if (!rq) {
                        head = ce->ring->tail;
@@ -1692,14 +1692,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                }
 
                if (i915_request_started(rq))
-                       local_stalled = true;
+                       guilty = stalled & ce->engine->mask;
 
                GEM_BUG_ON(i915_active_is_idle(&ce->active));
                head = intel_ring_wrap(ce->ring, rq->head);
 
-               __i915_request_reset(rq, local_stalled && stalled);
+               __i915_request_reset(rq, guilty);
 out_replay:
-               guc_reset_state(ce, head, local_stalled && stalled);
+               guc_reset_state(ce, head, guilty);
 next_context:
                if (i != number_children)
                        ce = list_next_entry(ce, parallel.child_link);
@@ -1709,7 +1709,7 @@ next_context:
        intel_context_put(parent);
 }
 
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
 {
        struct intel_context *ce;
        unsigned long index;
@@ -4228,7 +4228,7 @@ static void guc_context_replay(struct intel_context *ce)
 {
        struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
 
-       __guc_reset_context(ce, true);
+       __guc_reset_context(ce, ce->engine->mask);
        tasklet_hi_schedule(&sched_engine->tasklet);
 }
 
index ecf149c..3c3527c 100644 (file)
@@ -597,7 +597,7 @@ sanitize:
        __uc_sanitize(uc);
 }
 
-void intel_uc_reset(struct intel_uc *uc, bool stalled)
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
 {
        struct intel_guc *guc = &uc->guc;
 
index 866b462..a8f38c2 100644 (file)
@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_driver_remove(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
-void intel_uc_reset(struct intel_uc *uc, bool stalled);
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
 void intel_uc_reset_finish(struct intel_uc *uc);
 void intel_uc_cancel_requests(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);