From 303760aa914b7f5ac9602dbb4b471a2ad52eeb3e Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Apr 2022 17:30:45 -0700 Subject: [PATCH] i915/guc/reset: Make __guc_reset_context aware of guilty engines There are 2 ways an engine can get reset in i915 and the method of reset affects how KMD labels a context as guilty/innocent. (1) GuC initiated engine-reset: GuC resets a hung engine and notifies KMD. The context that hung on the engine is marked guilty and all other contexts are innocent. The innocent contexts are resubmitted. (2) GT based reset: When an engine heartbeat fails to tick, KMD initiates a gt/chip reset. All active contexts are marked as guilty and discarded. In order to correctly mark the contexts as guilty/innocent, pass a mask of engines that were reset to __guc_reset_context. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220426003045.3929439-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++-------- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 894f17f..11bf33f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -808,7 +808,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); - intel_uc_reset(>->uc, true); + intel_uc_reset(>->uc, ALL_ENGINES); intel_ggtt_restore_fences(gt->ggtt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 3f3373f..966e69a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -443,7 +443,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc); void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); void intel_guc_submission_reset_prepare(struct intel_guc *guc); -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 8bf8b6d..5a1dfac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1654,9 +1654,9 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void __guc_reset_context(struct intel_context *ce, bool stalled) +static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) { - bool local_stalled; + bool guilty; struct i915_request *rq; unsigned long flags; u32 head; @@ -1684,7 +1684,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) if (!intel_context_is_pinned(ce)) goto next_context; - local_stalled = false; + guilty = false; rq = intel_context_find_active_request(ce); if (!rq) { head = ce->ring->tail; @@ -1692,14 +1692,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) } if (i915_request_started(rq)) - local_stalled = true; + guilty = stalled & ce->engine->mask; GEM_BUG_ON(i915_active_is_idle(&ce->active)); head = intel_ring_wrap(ce->ring, rq->head); - __i915_request_reset(rq, local_stalled && stalled); + __i915_request_reset(rq, guilty); out_replay: - guc_reset_state(ce, head, local_stalled && stalled); + guc_reset_state(ce, head, guilty); next_context: if (i != number_children) ce = list_next_entry(ce, parallel.child_link); @@ -1709,7 +1709,7 @@ next_context: intel_context_put(parent); } -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) { struct intel_context *ce; unsigned long index; @@ -4228,7 +4228,7 @@ static void guc_context_replay(struct intel_context *ce) { struct i915_sched_engine *sched_engine = ce->engine->sched_engine; - __guc_reset_context(ce, true); + __guc_reset_context(ce, ce->engine->mask); tasklet_hi_schedule(&sched_engine->tasklet); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index ecf149c..3c3527c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -597,7 +597,7 @@ sanitize: __uc_sanitize(uc); } -void intel_uc_reset(struct intel_uc *uc, bool stalled) +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled) { struct intel_guc *guc = &uc->guc; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 866b462..a8f38c2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); -void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled); void intel_uc_reset_finish(struct intel_uc *uc); void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); -- 2.7.4