drm/amdgpu: enable ras error count query and reset for HDP
authorHawking Zhang <Hawking.Zhang@amd.com>
Wed, 28 Apr 2021 14:51:22 +0000 (22:51 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 10 May 2021 22:06:43 +0000 (18:06 -0400)
add hdp block ras error query and reset support in
amdgpu ras error count query and reset interface

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c

index ae9fb20..984e827 100644 (file)
@@ -890,6 +890,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                    adev->gmc.xgmi.ras_funcs->query_ras_error_count)
                        adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
                break;
+       case AMDGPU_RAS_BLOCK__HDP:
+               if (adev->hdp.ras_funcs &&
+                   adev->hdp.ras_funcs->query_ras_error_count)
+                       adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+               break;
        default:
                break;
        }
@@ -967,6 +972,11 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
                if (adev->sdma.funcs->reset_ras_error_count)
                        adev->sdma.funcs->reset_ras_error_count(adev);
                break;
+       case AMDGPU_RAS_BLOCK__HDP:
+               if (adev->hdp.ras_funcs &&
+                   adev->hdp.ras_funcs->reset_ras_error_count)
+                       adev->hdp.ras_funcs->reset_ras_error_count(adev);
+               break;
        default:
                break;
        }
index b53aa4d..6028b55 100644 (file)
@@ -1272,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
            adev->mmhub.ras_funcs->reset_ras_error_count)
                adev->mmhub.ras_funcs->reset_ras_error_count(adev);
 
+       if (adev->hdp.ras_funcs &&
+           adev->hdp.ras_funcs->reset_ras_error_count)
+               adev->hdp.ras_funcs->reset_ras_error_count(adev);
+
        r = amdgpu_gmc_ras_late_init(adev);
        if (r)
                return r;
index d80e12b..28e9f6b 100644 (file)
@@ -1521,9 +1521,6 @@ static int soc15_common_late_init(void *handle)
        if (amdgpu_sriov_vf(adev))
                xgpu_ai_mailbox_get_irq(adev);
 
-       if (adev->hdp.funcs->reset_ras_error_count)
-               adev->hdp.funcs->reset_ras_error_count(adev);
-
        if (adev->nbio.ras_funcs &&
            adev->nbio.ras_funcs->ras_late_init)
                r = adev->nbio.ras_funcs->ras_late_init(adev);