From 2cf49e00d40d5132e3d067b5aa6d84791929ab15 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Sun, 14 Nov 2021 12:38:18 -0500 Subject: [PATCH] drm/amd/amdkfd: Fix kernel panic when reset failed and been triggered again In SRIOV configuration, the reset may failed to bring asic back to normal but stop cpsch already been called, the start_cpsch will not be called since there is no resume in this case. When reset been triggered again, driver should avoid to do uninitialization again. Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 003ba6a..93e33dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1226,6 +1226,11 @@ static int stop_cpsch(struct device_queue_manager *dqm) bool hanging; dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + if (!dqm->is_hws_hang) unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); hanging = dqm->is_hws_hang || dqm->is_resetting; -- 2.7.4