#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
/* Common functions */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job);
}
/**
+ * amdgpu_device_has_job_running - check if there is any job in mirror list
+ *
+ * @adev: amdgpu device pointer
+ *
+ * check if there is any job in mirror list
+ */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
+{
+ int i;
+ struct drm_sched_job *job;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ spin_lock(&ring->sched.job_list_lock);
+ job = list_first_entry_or_null(&ring->sched.ring_mirror_list,
+ struct drm_sched_job, node);
+ spin_unlock(&ring->sched.job_list_lock);
+ if (job)
+ return true;
+ }
+ return false;
+}
+
+/**
* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
*
* @adev: amdgpu device pointer
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
- && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
+ && (amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
}
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
- && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+ && (amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))