drm/i915: Use ABI engine class in error state ecode
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Thu, 5 Nov 2020 11:38:42 +0000 (11:38 +0000)
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>
Mon, 9 Nov 2020 12:00:22 +0000 (12:00 +0000)
Instead of printing out the internal engine mask, which can change between
kernel versions making it difficult to map to actual engines, present a
bitmask of hanging engines ABI classes. For example:

  [drm] GPU HANG: ecode 9:8:24dffffd, in gem_exec_schedu [1334]

Engine ABI class is useful to quickly categorize render vs media etc hangs
in bug reports. Considering virtual engine even more so than the current
scheme.

v2:
 * Do not re-order fields. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201105113842.1395391-1-tvrtko.ursulin@linux.intel.com
drivers/gpu/drm/i915/i915_gpu_error.c

index 994738d974cce2b4d82ea3594f178ac3bb16eae3..f38e6abd45792eeccc318a732e99cc209b045ccd 100644 (file)
@@ -1659,17 +1659,16 @@ static u32 generate_ecode(const struct intel_engine_coredump *ee)
 static const char *error_msg(struct i915_gpu_coredump *error)
 {
        struct intel_engine_coredump *first = NULL;
+       unsigned int hung_classes = 0;
        struct intel_gt_coredump *gt;
-       intel_engine_mask_t engines;
        int len;
 
-       engines = 0;
        for (gt = error->gt; gt; gt = gt->next) {
                struct intel_engine_coredump *cs;
 
                for (cs = gt->engine; cs; cs = cs->next) {
                        if (cs->hung) {
-                               engines |= cs->engine->mask;
+                               hung_classes |= BIT(cs->engine->uabi_class);
                                if (!first)
                                        first = cs;
                        }
@@ -1678,7 +1677,7 @@ static const char *error_msg(struct i915_gpu_coredump *error)
 
        len = scnprintf(error->error_msg, sizeof(error->error_msg),
                        "GPU HANG: ecode %d:%x:%08x",
-                       INTEL_GEN(error->i915), engines,
+                       INTEL_GEN(error->i915), hung_classes,
                        generate_ecode(first));
        if (first && first->context.pid) {
                /* Just show the first executing process, more is confusing */