drm/i915/gt: Use a mmio read of the CSB in case of failure
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 15 Sep 2020 13:49:23 +0000 (14:49 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 15 Sep 2020 14:33:54 +0000 (15:33 +0100)
If we find the GPU didn't update the CSB within 50us, we currently fail
and eventually reset the GPU. Lets report the value from the mmio space
as a last resort, it may just stave off an unnecessary GPU reset.

References: HSDES#22011327657
Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_lrc_reg.h

index fcb6ec3..0d57b54 100644 (file)
@@ -2528,19 +2528,42 @@ static inline bool gen8_csb_parse(const u64 csb)
        return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
 
-static noinline u64 wa_csb_read(u64 * const csb)
+static noinline u64
+wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
 {
        u64 entry;
 
+       /*
+        * Reading from the HWSP has one particular advantage: we can detect
+        * a stale entry. Since the write into HWSP is broken, we have no reason
+        * to trust the HW at all, the mmio entry may equally be unordered, so
+        * we prefer the path that is self-checking and as a last resort,
+        * return the mmio value.
+        *
+        * tgl,dg1:HSDES#22011327657
+        */
        preempt_disable();
-       if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-               GEM_WARN_ON("50us CSB timeout");
+       if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
+               int idx = csb - engine->execlists.csb_status;
+               int status;
+
+               status = GEN8_EXECLISTS_STATUS_BUF;
+               if (idx >= 6) {
+                       status = GEN11_EXECLISTS_STATUS_BUF2;
+                       idx -= 6;
+               }
+               status += sizeof(u64) * idx;
+
+               entry = intel_uncore_read64(engine->uncore,
+                                           _MMIO(engine->mmio_base + status));
+       }
        preempt_enable();
 
        return entry;
 }
 
-static inline u64 csb_read(u64 * const csb)
+static inline u64
+csb_read(const struct intel_engine_cs *engine, u64 * const csb)
 {
        u64 entry = READ_ONCE(*csb);
 
@@ -2556,7 +2579,7 @@ static inline u64 csb_read(u64 * const csb)
         * tgl:HSDES#22011248461
         */
        if (unlikely(entry == -1))
-               entry = wa_csb_read(csb);
+               entry = wa_csb_read(engine, csb);
 
        /* Consume this entry so that we can spot its future reuse. */
        WRITE_ONCE(*csb, -1);
@@ -2649,7 +2672,7 @@ static void process_csb(struct intel_engine_cs *engine)
                 * status notifier.
                 */
 
-               csb = csb_read(buf + head);
+               csb = csb_read(engine, buf + head);
                ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
                             head, upper_32_bits(csb), lower_32_bits(csb));
 
index 93cb6c4..1b51f7b 100644 (file)
@@ -49,4 +49,7 @@
 #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT      0x1A
 #define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT      0xD
 
+#define GEN8_EXECLISTS_STATUS_BUF 0x370
+#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
+
 #endif /* _INTEL_LRC_REG_H_ */