drm/i915/gt: Apply the CSB w/a for all

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 15 Sep 2020 13:49:22 +0000 (14:49 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 15 Sep 2020 14:33:54 +0000 (15:33 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 15 Sep 2020 13:49:22 +0000 (14:49 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 15 Sep 2020 14:33:54 +0000 (15:33 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index d75712a..fcb6ec3 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
   *     bits 47-57: sw context id of the lrc the GT switched away from
   *     bits 58-63: sw counter of the lrc the GT switched away from
   */
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
  {
-       bool ctx_away_valid;
-       bool new_queue;
-       u64 entry;
-
-       /* HSD#22011248461 */
-       entry = READ_ONCE(*csb);
-       if (unlikely(entry == -1)) {
-               preempt_disable();
-               if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-                       GEM_WARN_ON("50us CSB timeout");
-               preempt_enable();
-       }
-       WRITE_ONCE(*(u64 *)csb, -1);
-
-       ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-       new_queue =
-               lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+       bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+       bool new_queue =
+               lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
  
         /*
          * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
          * would require some extra handling, but we don't support that.
          */
         if (!ctx_away_valid || new_queue) {
-               GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+               GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
                 return true;
         }
  
@@ -2533,19 +2519,56 @@ static inline bool gen12_csb_parse(const u64 *csb)
          * context switch on an unsuccessful wait instruction since we always
          * use polling mode.
          */
-       GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+       GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
         return false;
  }
  
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+       return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static noinline u64 wa_csb_read(u64 * const csb)
  {
-       return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+       u64 entry;
+
+       preempt_disable();
+       if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+               GEM_WARN_ON("50us CSB timeout");
+       preempt_enable();
+
+       return entry;
+}
+
+static inline u64 csb_read(u64 * const csb)
+{
+       u64 entry = READ_ONCE(*csb);
+
+       /*
+        * Unfortunately, the GPU does not always serialise its write
+        * of the CSB entries before its write of the CSB pointer, at least
+        * from the perspective of the CPU, using what is known as a Global
+        * Observation Point. We may read a new CSB tail pointer, but then
+        * read the stale CSB entries, causing us to misinterpret the
+        * context-switch events, and eventually declare the GPU hung.
+        *
+        * icl:HSDES#1806554093
+        * tgl:HSDES#22011248461
+        */
+       if (unlikely(entry == -1))
+               entry = wa_csb_read(csb);
+
+       /* Consume this entry so that we can spot its future reuse. */
+       WRITE_ONCE(*csb, -1);
+
+       /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
+       return entry;
  }
  
  static void process_csb(struct intel_engine_cs *engine)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
-       const u64 * const buf = execlists->csb_status;
+       u64 * const buf = execlists->csb_status;
         const u8 num_entries = execlists->csb_size;
         u8 head, tail;
  
@@ -2603,6 +2626,7 @@ static void process_csb(struct intel_engine_cs *engine)
         rmb();
         do {
                 bool promote;
+               u64 csb;
  
                 if (++head == num_entries)
                         head = 0;
@@ -2625,15 +2649,14 @@ static void process_csb(struct intel_engine_cs *engine)
                  * status notifier.
                  */
  
+               csb = csb_read(buf + head);
                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-                            head,
-                            upper_32_bits(buf[head]),
-                            lower_32_bits(buf[head]));
+                            head, upper_32_bits(csb), lower_32_bits(csb));
  
                 if (INTEL_GEN(engine->i915) >= 12)
-                       promote = gen12_csb_parse(buf + head);
+                       promote = gen12_csb_parse(csb);
                 else
-                       promote = gen8_csb_parse(buf + head);
+                       promote = gen8_csb_parse(csb);
                 if (promote) {
                         struct i915_request * const *old = execlists->active;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 15 Sep 2020 13:49:22 +0000 (14:49 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 15 Sep 2020 14:33:54 +0000 (15:33 +0100)