drm/i915: Use trace_printk to provide a death rattle for GEM
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 9 Nov 2017 14:30:19 +0000 (14:30 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 9 Nov 2017 21:39:18 +0000 (21:39 +0000)
Trying to enable printk debugging for GEM is fraught with the issue of
spam; interactions with HW are very frequent and often boring. However,
one instance where they are not so boring is just before a BUG; here
ftrace provides a facility to dump its ringbuffer on an oops. So for CI
let's enable trace_printk() to capture the last exchanges with HW as a
death rattle.

For example,
[   79.234110] ------------[ cut here ]------------
[   79.234137] kernel BUG at drivers/gpu/drm/i915/intel_lrc.c:907!
[   79.234145] invalid opcode: 0000 [#1] SMP
[   79.234153] Dumping ftrace buffer:
[   79.234158] ---------------------------------
...
[   79.314044] gem_conc-1059    1..s1 79203443us : intel_lrc_irq_handler: bcs0 out[0]: ctx=5.2, seqno=145
[   79.314089] gem_conc-1059    1..s. 79220800us : intel_lrc_irq_handler: bcs0 csb[1/1]: status=0x00000018:0x00000005
[   79.314133] gem_conc-1059    1..s. 79220803us : intel_lrc_irq_handler: bcs0 out[0]: ctx=5.1, seqno=145
[   79.314177] gem_conc-1062    2..s1 79230458us : intel_lrc_irq_handler: bcs0 in[0]:  ctx=8.1, seqno=146
[   79.314220] gem_conc-1062    2..s1 79230515us : intel_lrc_irq_handler: bcs0 in[0]:  ctx=8.2, seqno=147
[   79.314265] gem_conc-1059    1..s1 79230951us : intel_lrc_irq_handler: bcs0 csb[2/3]: status=0x00000012:0x00000008
[   79.314309] gem_conc-1059    1..s1 79230954us : intel_lrc_irq_handler: bcs0 out[0]: ctx=8.2, seqno=147
[   79.314353] gem_conc-1059    1..s1 79230954us : intel_lrc_irq_handler: bcs0 csb[3/3]: status=0x00008002:0x00000008
[   79.314396] gem_conc-1059    1..s1 79230955us : intel_lrc_irq_handler: bcs0 out[0]: ctx=8.1, seqno=147
[   79.314402] ---------------------------------

v2: Tweak the formatting to be more consistent between in/out.
v3: do {} while (0) stub macro protection

Suggested-by: MichaƂ Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171109143019.16568-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/Kconfig.debug
drivers/gpu/drm/i915/i915_gem.h
drivers/gpu/drm/i915/intel_lrc.c

index 19c77c6..9e53edb 100644 (file)
@@ -28,6 +28,7 @@ config DRM_I915_DEBUG
        select SW_SYNC # signaling validation framework (igt/syncobj*)
        select DRM_I915_SW_FENCE_DEBUG_OBJECTS
        select DRM_I915_SELFTEST
+       select DRM_I915_TRACE_GEM
         default n
         help
           Choose this option to turn on extra driver debugging that may affect
@@ -49,6 +50,19 @@ config DRM_I915_DEBUG_GEM
 
           If in doubt, say "N".
 
+config DRM_I915_TRACE_GEM
+       bool "Insert extra ftrace output from the GEM internals"
+       select TRACING
+       default n
+       help
+         Enable additional and verbose debugging output that will spam
+         ordinary tests, but may be vital for post-mortem debugging when
+         used with /proc/sys/kernel/ftrace_dump_on_oops
+
+         Recommended for driver developers only.
+
+         If in doubt, say "N".
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
         bool "Enable additional driver debugging for fence objects"
         depends on DRM_I915
index ee54597..ff42b5f 100644 (file)
 #define GEM_DEBUG_BUG_ON(expr)
 #endif
 
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
+#define GEM_TRACE(...) trace_printk(__VA_ARGS__)
+#else
+#define GEM_TRACE(...) do { } while (0)
+#endif
+
 #define I915_NUM_ENGINES 5
 
 #endif /* __I915_GEM_H__ */
index 6840ec8..020ca7c 100644 (file)
@@ -466,6 +466,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
                        port_set(&port[n], port_pack(rq, count));
                        desc = execlists_update_context(rq);
                        GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+
+                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x\n",
+                                 engine->name, n,
+                                 rq->ctx->hw_id, count,
+                                 rq->global_seqno);
                } else {
                        GEM_BUG_ON(!n);
                        desc = 0;
@@ -520,6 +525,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
        ce->ring->tail &= (ce->ring->size - 1);
        ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
 
+       GEM_TRACE("\n");
        for (n = execlists_num_ports(&engine->execlists); --n; )
                elsp_write(0, elsp);
 
@@ -832,6 +838,10 @@ static void intel_lrc_irq_handler(unsigned long data)
                        head = execlists->csb_head;
                        tail = READ_ONCE(buf[write_idx]);
                }
+               GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n",
+                         engine->name,
+                         head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))),
+                         tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))));
 
                while (head != tail) {
                        struct drm_i915_gem_request *rq;
@@ -859,6 +869,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                         */
 
                        status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+                       GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n",
+                                 engine->name, head,
+                                 status, buf[2*head + 1]);
                        if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
                                continue;
 
@@ -886,6 +899,10 @@ static void intel_lrc_irq_handler(unsigned long data)
                        GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
                        rq = port_unpack(port, &count);
+                       GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+                                 engine->name,
+                                 rq->ctx->hw_id, count,
+                                 rq->global_seqno);
                        GEM_BUG_ON(count == 0);
                        if (--count == 0) {
                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);