drm/i915/guc: Don't abort on CTB_UNUSED status
authorJohn Harrison <John.C.Harrison@Intel.com>
Thu, 28 Jul 2022 02:42:25 +0000 (19:42 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Fri, 29 Jul 2022 17:35:59 +0000 (10:35 -0700)
When the KMD sends a CLIENT_RESET request to GuC (as part of the
suspend sequence), GuC will mark the CTB buffer as 'UNUSED'. If the
KMD then checked the CTB queue, it would see a non-zero status value
and report the buffer as corrupted.

Technically, no G2H messages should be received once the CLIENT_RESET
has been sent. However, if a context was outstanding on an engine then
it would get reset and a reset notification would be sent. So, don't
actually treat UNUSED as a catastrophic error. Just flag it up as
unexpected and keep going.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-7-John.C.Harrison@Intel.com
drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

index df83c1c..28b8387 100644 (file)
@@ -37,6 +37,7 @@
  *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
  *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
  *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use)         |
  *  +---+-------+--------------------------------------------------------------+
  *  |...|       | RESERVED = MBZ                                               |
  *  +---+-------+--------------------------------------------------------------+
@@ -49,9 +50,10 @@ struct guc_ct_buffer_desc {
        u32 tail;
        u32 status;
 #define GUC_CTB_STATUS_NO_ERROR                                0
-#define GUC_CTB_STATUS_OVERFLOW                                (1 << 0)
-#define GUC_CTB_STATUS_UNDERFLOW                       (1 << 1)
-#define GUC_CTB_STATUS_MISMATCH                                (1 << 2)
+#define GUC_CTB_STATUS_OVERFLOW                                BIT(0)
+#define GUC_CTB_STATUS_UNDERFLOW                       BIT(1)
+#define GUC_CTB_STATUS_MISMATCH                                BIT(2)
+#define GUC_CTB_STATUS_UNUSED                          BIT(3)
        u32 reserved[13];
 } __packed;
 static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
index 74194c1..2b22065 100644 (file)
@@ -829,8 +829,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
        if (unlikely(ctb->broken))
                return -EPIPE;
 
-       if (unlikely(desc->status))
-               goto corrupted;
+       if (unlikely(desc->status)) {
+               u32 status = desc->status;
+
+               if (status & GUC_CTB_STATUS_UNUSED) {
+                       /*
+                        * Potentially valid if a CLIENT_RESET request resulted in
+                        * contexts/engines being reset. But should never happen as
+                        * no contexts should be active when CLIENT_RESET is sent.
+                        */
+                       CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
+                       status &= ~GUC_CTB_STATUS_UNUSED;
+               }
+
+               if (status)
+                       goto corrupted;
+       }
 
        GEM_BUG_ON(head > size);