Revert "drm/amdgpu: let mode2 reset fallback to default when failure"
authorVictor Zhao <Victor.Zhao@amd.com>
Thu, 13 Oct 2022 03:06:33 +0000 (11:06 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 17 Oct 2022 21:41:20 +0000 (17:41 -0400)
This reverts commit dac6b80818ac2353631c5a33d140d8d5508e2957.

This commit reverted the AMDGPU_SKIP_MODE2_RESET as it conflicts with
the original design of reset handler. Will redesign it.

Fixes: dac6b80818ac23 ("drm/amdgpu: let mode2 reset fallback to default when failure")
Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c

index 03bbfaa51cbcb85cb4e85d9516f7432e89c673ae..0561812aa0a43ed07362c4627098673c4f623c6b 100644 (file)
@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
        reset_context.method = AMD_RESET_METHOD_NONE;
        reset_context.reset_req_dev = adev;
        clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-       clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
        amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
index ab8f970b284918b408f37ca79b493bc9384f8a9c..bb73fb420ffcd3acb21d2a5329f3d1a06520240a 100644 (file)
@@ -5210,7 +5210,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
        reset_context->job = job;
        reset_context->hive = hive;
-
        /*
         * Build list of devices to reset.
         * In case we are in XGMI hive mode, resort the device list
@@ -5337,11 +5336,8 @@ retry:   /* Rest of adevs pre asic reset from XGMI hive. */
                        amdgpu_ras_resume(adev);
        } else {
                r = amdgpu_do_asic_reset(device_list_handle, reset_context);
-               if (r && r == -EAGAIN) {
-                       set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
-                       adev->asic_reset_res = 0;
+               if (r && r == -EAGAIN)
                        goto retry;
-               }
 
                if (!r && gpu_reset_for_dev_remove)
                        goto recover_end;
@@ -5777,7 +5773,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
        reset_context.reset_req_dev = adev;
        set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
        set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-       set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
        adev->no_hw_access = true;
        r = amdgpu_device_pre_asic_reset(adev, &reset_context);
index 46c99331d7f126a98d31631f1f03fac89c89d048..cd968e781077ecef807e652e498e46f88ee80717 100644 (file)
@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
                if (r)
index 2dad7aa9a03b94737dfc4948bc82b5eaac3991b1..75f1402101f4cd8ea614527bdbd24f8698722c21 100644 (file)
@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
        }
index 831fb222139c6c232e4b9074d9e2047d80d18d69..f778466bb9dbdf3311ebdf053aada97d94531f6d 100644 (file)
@@ -74,9 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
 {
        struct amdgpu_reset_handler *reset_handler = NULL;
 
-       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
-               return -ENOSYS;
-
        if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
                reset_handler = adev->reset_cntl->get_reset_handler(
                        adev->reset_cntl, reset_context);
@@ -93,9 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
        int ret;
        struct amdgpu_reset_handler *reset_handler = NULL;
 
-       if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
-               return -ENOSYS;
-
        if (adev->reset_cntl)
                reset_handler = adev->reset_cntl->get_reset_handler(
                        adev->reset_cntl, reset_context);
index f5318fedf2f0460fd287b837a8b2d4a209f0248c..f4a501ff87d906ef415207f826005ca914c7f60a 100644 (file)
@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
 
        AMDGPU_NEED_FULL_RESET = 0,
        AMDGPU_SKIP_HW_RESET = 1,
-       AMDGPU_SKIP_MODE2_RESET = 2,
-       AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
+       AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
 };
 
 struct amdgpu_reset_context {
index a2f04b24913299fecee52d837289467f501e357b..12906ba74462fb65669392bc826663e8fbb60d09 100644 (file)
@@ -290,7 +290,6 @@ flr_done:
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }
index a977f0027928d0cd121cf65bae42051ae6442bc5..e07757eea7adf95bb43b1a330166b8e84a75468b 100644 (file)
@@ -317,7 +317,6 @@ flr_done:
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }
index fd14fa9b9cd7cb0fd8c85cca16bf14b50dcead20..288c414babdfa740b598ab49142666b2586beca1 100644 (file)
@@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
                clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-               clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(adev, NULL, &reset_context);
        }