From: Andrey Grodzovsky Date: Tue, 25 Jan 2022 16:32:47 +0000 (-0500) Subject: drm/amdgpu: Rework amdgpu_device_lock_adev X-Git-Tag: v6.6.17~3937^2~23^2~3080 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e923be9934a9c54a94e443f9e77bda5b9fbd1ce5;p=platform%2Fkernel%2Flinux-rpi.git drm/amdgpu: Rework amdgpu_device_lock_adev This functions needs to be split into 2 parts where one is called only once for locking single instance of reset_domain's sem and reset flag and the other part which handles MP1 states should still be called for each device in XGMI hive. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Link: https://www.spinics.net/lists/amd-gfx/msg74118.html --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e05d7cb..f69ab22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4825,16 +4825,8 @@ end: return r; } -static void amdgpu_device_lock_adev(struct amdgpu_device *adev, - struct amdgpu_hive_info *hive) +static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) { - atomic_set(&adev->reset_domain->in_gpu_reset, 1); - - if (hive) { - down_write_nest_lock(&adev->reset_domain->sem, &hive->hive_lock); - } else { - down_write(&adev->reset_domain->sem); - } switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: @@ -4849,12 +4841,10 @@ static void amdgpu_device_lock_adev(struct amdgpu_device *adev, } } -static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) +static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - atomic_set(&adev->reset_domain->in_gpu_reset, 0); - up_write(&adev->reset_domain->sem); } static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) @@ -5060,10 +5050,15 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, device_list_handle = &device_list; } + /* We need to lock reset domain only once both for XGMI and single device */ + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_lock_reset_domain(tmp_adev->reset_domain, hive); + /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - amdgpu_device_lock_adev(tmp_adev, hive); + amdgpu_device_set_mp1_state(tmp_adev); /* * Try to put the audio codec into suspend state @@ -5213,9 +5208,14 @@ skip_sched_resume: if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); - amdgpu_device_unlock_adev(tmp_adev); + + amdgpu_device_unset_mp1_state(tmp_adev); } + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); + if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); @@ -5477,7 +5477,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta * Locking adev->reset_domain->sem will prevent any external access * to GPU during PCI error recovery */ - amdgpu_device_lock_adev(adev, NULL); + amdgpu_device_lock_reset_domain(adev->reset_domain, NULL); + amdgpu_device_set_mp1_state(adev); /* * Block any work scheduling as we do for regular GPU reset @@ -5584,7 +5585,8 @@ out: DRM_INFO("PCIe error recovery succeeded\n"); } else { DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -5621,7 +5623,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev) drm_sched_start(&ring->sched, true); } - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 5ab72c3..9b18ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -137,5 +137,24 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d return reset_domain; } +void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain, + struct amdgpu_hive_info *hive) +{ + atomic_set(&reset_domain->in_gpu_reset, 1); + + if (hive) { + down_write_nest_lock(&reset_domain->sem, &hive->hive_lock); + } else { + down_write(&reset_domain->sem); + } +} + + +void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) +{ + atomic_set(&reset_domain->in_gpu_reset, 0); + up_write(&reset_domain->sem); +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index ea6fc98..92de3b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -118,5 +118,9 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain, + struct amdgpu_hive_info *hive); + +void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); #endif