From fa4a427d84f9b797970a3d5139d7645403e4e989 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 13 Dec 2021 21:38:20 +0000 Subject: [PATCH] drm/amdgpu: SRIOV flr_work should use down_write Host initiated VF FLR may fail if someone else is already holding a read_lock. Change from down_write_trylock to down_write to guarantee the reset goes through. Signed-off-by: Victor Skvortsov Reviewed by: Shaoyun.liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 23b066b..0077e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -252,11 +252,12 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index a35e6d8..477d0dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -281,11 +281,12 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); -- 2.7.4