drm/etnaviv: print offender task information on hangcheck recovery
authorChristian Gmeiner <christian.gmeiner@gmail.com>
Fri, 3 Jun 2022 12:37:05 +0000 (14:37 +0200)
committerJaehoon Chung <jh80.chung@samsung.com>
Wed, 13 Mar 2024 06:58:55 +0000 (15:58 +0900)
Track the pid per submit, so we can print the name and cmdline of
the task which submitted the batch that caused the gpu to hang.

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
(cherry picked from commit c71d19490195b04221d74883a913360f1d2c70de)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/etnaviv_sched.c

index 98e60df882b688bf16edfff1930f26553e353748..6597c7da47f4430cf37f3c94fdfb29345ef70168 100644 (file)
@@ -99,6 +99,7 @@ struct etnaviv_gem_submit {
        int out_fence_id;
        struct list_head node; /* GPU active submit list */
        struct etnaviv_cmdbuf cmdbuf;
+       struct pid *pid;       /* submitting process */
        bool runtime_resumed;
        u32 exec_state;
        u32 flags;
index ddf539f26f2dad3891fac462060b60ea7f7317c7..54924d66ab10b5a0901cdcac6475991e092a9657 100644 (file)
@@ -412,6 +412,9 @@ static void submit_cleanup(struct kref *kref)
                mutex_unlock(&submit->gpu->fence_lock);
                dma_fence_put(submit->out_fence);
        }
+
+       put_pid(submit->pid);
+
        kfree(submit->pmrs);
        kfree(submit);
 }
@@ -435,6 +438,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
        struct sync_file *sync_file = NULL;
        struct ww_acquire_ctx ticket;
        int out_fence_fd = -1;
+       struct pid *pid = get_pid(task_pid(current));
        void *stream;
        int ret;
 
@@ -532,6 +536,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
                goto err_submit_ww_acquire;
        }
 
+       submit->pid = pid;
+
        ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &submit->cmdbuf,
                                  ALIGN(args->stream_size, 8) + 8);
        if (ret)
index 950cde75449c3e928797848ab332331c68d458a4..8f34e54658a09c6fa1a562f8d491844b2a453389 100644 (file)
@@ -1046,12 +1046,28 @@ pm_put:
 }
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit)
 {
+       struct etnaviv_gpu *gpu = submit->gpu;
+       char *comm = NULL, *cmd = NULL;
+       struct task_struct *task;
        unsigned int i = 0;
 
        dev_err(gpu->dev, "recover hung GPU!\n");
 
+       task = get_pid_task(submit->pid, PIDTYPE_PID);
+       if (task) {
+               comm = kstrdup(task->comm, GFP_KERNEL);
+               cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+               put_task_struct(task);
+       }
+
+       if (comm && cmd)
+               dev_err(gpu->dev, "offending task: %s (%s)\n", comm, cmd);
+
+       kfree(cmd);
+       kfree(comm);
+
        if (pm_runtime_get_sync(gpu->dev) < 0)
                goto pm_put;
 
index 85eddd492774d59c34189f6768e24e3f42dd8766..b3a0941d56fd37efef360f6fc8919d6d3a85cf4e 100644 (file)
@@ -168,7 +168,7 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu);
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit);
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
        u32 fence, struct drm_etnaviv_timespec *timeout);
index 026b6c0731198093cb33b444aa85c688fafcc949..f54d98c2d7bb8ff07ace1d01619d72dfa52ac451 100644 (file)
@@ -118,7 +118,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 
        /* get the GPU back into the init state */
        etnaviv_core_dump(submit);
-       etnaviv_gpu_recover_hang(gpu);
+       etnaviv_gpu_recover_hang(submit);
 
        drm_sched_resubmit_jobs(&gpu->sched);