drm/amdgpu: Fix hang on device removal.
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Wed, 12 May 2021 14:26:44 +0000 (10:26 -0400)
committerAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Thu, 20 May 2021 03:50:28 +0000 (23:50 -0400)
If removing while commands in flight you cannot wait to flush the
HW fences on a ring since the device is gone.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210512142648.666476-13-andrey.grodzovsky@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

index 1ffb36b..fa03702 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/firmware.h>
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -525,8 +526,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
  */
 void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
 {
-       unsigned i, j;
-       int r;
+       int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
                struct amdgpu_ring *ring = adev->rings[i];
@@ -535,11 +535,15 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
                        continue;
                if (!ring->no_scheduler)
                        drm_sched_fini(&ring->sched);
-               r = amdgpu_fence_wait_empty(ring);
-               if (r) {
-                       /* no need to trigger GPU reset as we are unloading */
+               /* You can't wait for HW to signal if it's gone */
+               if (!drm_dev_is_unplugged(&adev->ddev))
+                       r = amdgpu_fence_wait_empty(ring);
+               else
+                       r = -ENODEV;
+               /* no need to trigger GPU reset as we are unloading */
+               if (r)
                        amdgpu_fence_driver_force_completion(ring);
-               }
+
                if (ring->fence_drv.irq_src)
                        amdgpu_irq_put(adev, ring->fence_drv.irq_src,
                                       ring->fence_drv.irq_type);