drm/i915/hangcheck: Track context changes
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 1 May 2019 11:45:28 +0000 (12:45 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 3 May 2019 10:47:23 +0000 (11:47 +0100)
Given sufficient preemption, we may see a busy system that doesn't
advance seqno while performing work across multiple contexts, and given
sufficient pathology not even notice a change in ACTHD. What does change
between the preempting contexts is their RING, so take note of that and
treat a change in the ring address as being an indication of forward
progress.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190501114541.10077-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_hangcheck.c

index 9d64e33..c0ab11b 100644 (file)
@@ -53,6 +53,7 @@ struct intel_instdone {
 
 struct intel_engine_hangcheck {
        u64 acthd;
+       u32 last_ring;
        u32 last_seqno;
        u32 next_seqno;
        unsigned long action_timestamp;
index e5eaa06..721ab74 100644 (file)
@@ -27,6 +27,7 @@
 
 struct hangcheck {
        u64 acthd;
+       u32 ring;
        u32 seqno;
        enum intel_engine_hangcheck_action action;
        unsigned long action_timestamp;
@@ -134,6 +135,7 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
 {
        hc->acthd = intel_engine_get_active_head(engine);
        hc->seqno = intel_engine_get_hangcheck_seqno(engine);
+       hc->ring = ENGINE_READ(engine, RING_START);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
@@ -141,18 +143,22 @@ static void hangcheck_store_sample(struct intel_engine_cs *engine,
 {
        engine->hangcheck.acthd = hc->acthd;
        engine->hangcheck.last_seqno = hc->seqno;
+       engine->hangcheck.last_ring = hc->ring;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
                     const struct hangcheck *hc)
 {
-       if (engine->hangcheck.last_seqno != hc->seqno)
-               return ENGINE_ACTIVE_SEQNO;
-
        if (intel_engine_is_idle(engine))
                return ENGINE_IDLE;
 
+       if (engine->hangcheck.last_ring != hc->ring)
+               return ENGINE_ACTIVE_SEQNO;
+
+       if (engine->hangcheck.last_seqno != hc->seqno)
+               return ENGINE_ACTIVE_SEQNO;
+
        return engine_stuck(engine, hc->acthd);
 }