drm/amdgpu: querry umc error count
authorHawking Zhang <Hawking.Zhang@amd.com>
Wed, 17 Jul 2019 13:49:53 +0000 (21:49 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 31 Jul 2019 19:49:28 +0000 (14:49 -0500)
check umc error count in both ras querry function and
ras interrupt handler

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index ff695ce..3d39d62 100644 (file)
@@ -588,11 +588,19 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
                struct ras_query_if *info)
 {
        struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+       struct ras_err_data err_data = {0, 0};
 
        if (!obj)
                return -EINVAL;
-       /* TODO might read the register to read the count */
 
+       switch (info->head.block) {
+       case AMDGPU_RAS_BLOCK__UMC:
+               if (adev->umc_funcs->query_ras_error_count)
+                       adev->umc_funcs->query_ras_error_count(adev, &err_data);
+               break;
+       default:
+               break;
+       }
        info->ue_count = obj->err_data.ue_count;
        info->ce_count = obj->err_data.ce_count;
 
@@ -986,6 +994,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
        struct ras_ih_data *data = &obj->ih_data;
        struct amdgpu_iv_entry entry;
        int ret;
+       struct ras_err_data err_data = {0, 0};
 
        while (data->rptr != data->wptr) {
                rmb();
index 19f3d71..5282c94 100644 (file)
@@ -245,7 +245,10 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
 static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
                struct amdgpu_iv_entry *entry)
 {
+       struct ras_err_data err_data = {0, 0};
        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+       if (adev->umc_funcs->query_ras_error_count)
+               adev->umc_funcs->query_ras_error_count(adev, &err_data);
        amdgpu_ras_reset_gpu(adev, 0);
        return AMDGPU_RAS_UE;
 }