drm/amdgpu: stop all rings before doing gpu recover

author Monk Liu <Monk.Liu@amd.com>

Mon, 25 Dec 2017 07:14:58 +0000 (15:14 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 1 Mar 2018 16:52:23 +0000 (11:52 -0500)
author Monk Liu <Monk.Liu@amd.com>
Mon, 25 Dec 2017 07:14:58 +0000 (15:14 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 1 Mar 2018 16:52:23 +0000 (11:52 -0500)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 4124485..64bd300 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  
         /* block TTM */
         resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
         /* store modesetting */
         if (amdgpu_device_has_dc_support(adev))
                 state = drm_atomic_helper_suspend(adev->ddev);
  
-       /* block scheduler */
+       /* block all schedulers and reset given job's ring */
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
                 if (!ring || !ring->sched.thread)
                         continue;
  
-               /* only focus on the ring hit timeout if &job not NULL */
+               kthread_park(ring->sched.thread);
+
                 if (job && job->ring->idx != i)
                         continue;
  
-               kthread_park(ring->sched.thread);
                 drm_sched_hw_job_reset(&ring->sched, &job->base);
  
                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2706,33 +2707,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                         }
                         dma_fence_put(fence);
                 }
+       }
  
-               for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-                       struct amdgpu_ring *ring = adev->rings[i];
-
-                       if (!ring || !ring->sched.thread)
-                               continue;
+       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+               struct amdgpu_ring *ring = adev->rings[i];
  
-                       /* only focus on the ring hit timeout if &job not NULL */
-                       if (job && job->ring->idx != i)
-                               continue;
+               if (!ring || !ring->sched.thread)
+                       continue;
  
+               /* only need recovery sched of the given job's ring
+                * or all rings (in the case @job is NULL)
+                * after above amdgpu_reset accomplished
+                */
+               if ((!job || job->ring->idx == i) && !r)
                         drm_sched_job_recovery(&ring->sched);
-                       kthread_unpark(ring->sched.thread);
-               }
-       } else {
-               for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-                       struct amdgpu_ring *ring = adev->rings[i];
  
-                       if (!ring || !ring->sched.thread)
-                               continue;
-
-                       /* only focus on the ring hit timeout if &job not NULL */
-                       if (job && job->ring->idx != i)
-                               continue;
-
-                       kthread_unpark(adev->rings[i]->sched.thread);
-               }
+               kthread_unpark(ring->sched.thread);
         }
  
         if (amdgpu_device_has_dc_support(adev)) {
author	Monk Liu <Monk.Liu@amd.com>
	Mon, 25 Dec 2017 07:14:58 +0000 (15:14 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 1 Mar 2018 16:52:23 +0000 (11:52 -0500)