drm/msm: print offender task name on hangcheck recovery
authorRob Clark <robdclark@gmail.com>
Tue, 3 May 2016 14:10:15 +0000 (10:10 -0400)
committerRob Clark <robdclark@gmail.com>
Sun, 8 May 2016 14:22:18 +0000 (10:22 -0400)
Track the pid per submit, so we can print the name of the task which
submitted the batch that caused the gpu to hang.

Signed-off-by: Rob Clark <robdclark@gmail.com>
drivers/gpu/drm/msm/msm_gem.h
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/msm/msm_gpu.c

index 2771d15..9facd4b 100644 (file)
@@ -86,6 +86,7 @@ struct msm_gem_submit {
        struct list_head bo_list;
        struct ww_acquire_ctx ticket;
        struct fence *fence;
+       struct pid *pid;    /* submitting process */
        bool valid;         /* true if no cmdstream patching needed */
        unsigned int nr_cmds;
        unsigned int nr_bos;
index 711a658..a9a0011 100644 (file)
@@ -45,6 +45,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
        submit->dev = dev;
        submit->gpu = gpu;
+       submit->pid = get_pid(task_pid(current));
 
        /* initially, until copy_from_user() and bo lookup succeeds: */
        submit->nr_bos = 0;
@@ -60,6 +61,7 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
 {
        fence_put(submit->fence);
        list_del(&submit->node);
+       put_pid(submit->pid);
        kfree(submit);
 }
 
index 5aed93a..36ed53e 100644 (file)
@@ -272,16 +272,30 @@ static void recover_worker(struct work_struct *work)
 {
        struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
        struct drm_device *dev = gpu->dev;
+       struct msm_gem_submit *submit;
        uint32_t fence = gpu->funcs->last_fence(gpu);
 
-       dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-
        msm_update_fence(gpu->fctx, fence + 1);
 
        mutex_lock(&dev->struct_mutex);
-       if (msm_gpu_active(gpu)) {
-               struct msm_gem_submit *submit;
 
+       dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
+       list_for_each_entry(submit, &gpu->submit_list, node) {
+               if (submit->fence->seqno == (fence + 1)) {
+                       struct task_struct *task;
+
+                       rcu_read_lock();
+                       task = pid_task(submit->pid, PIDTYPE_PID);
+                       if (task) {
+                               dev_err(dev->dev, "%s: offending task: %s\n",
+                                               gpu->name, task->comm);
+                       }
+                       rcu_read_unlock();
+                       break;
+               }
+       }
+
+       if (msm_gpu_active(gpu)) {
                /* retire completed submits, plus the one that hung: */
                retire_submits(gpu);
 
@@ -293,6 +307,7 @@ static void recover_worker(struct work_struct *work)
                        gpu->funcs->submit(gpu, submit, NULL);
                }
        }
+
        mutex_unlock(&dev->struct_mutex);
 
        msm_gpu_retire(gpu);