drm/amdgpu: provide socket/die id info in RAS message
authorHawking Zhang <Hawking.Zhang@amd.com>
Sun, 25 Apr 2021 06:34:25 +0000 (14:34 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 29 Apr 2021 03:35:50 +0000 (23:35 -0400)
Add socket/die information in RAS messages for platforms
that support query those information

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index f62873fbf2495089d8f926be7b8a8a052b89cdee..ae9fb202525922c0480780acdace45d653ef00d6 100644 (file)
@@ -901,17 +901,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
        info->ce_count = obj->err_data.ce_count;
 
        if (err_data.ce_count) {
-               dev_info(adev->dev, "%ld correctable hardware errors "
+               if (adev->smuio.funcs &&
+                   adev->smuio.funcs->get_socket_id &&
+                   adev->smuio.funcs->get_die_id) {
+                       dev_info(adev->dev, "socket: %d, die: %d "
+                                       "%ld correctable hardware errors "
                                        "detected in %s block, no user "
                                        "action is needed.\n",
+                                       adev->smuio.funcs->get_socket_id(adev),
+                                       adev->smuio.funcs->get_die_id(adev),
                                        obj->err_data.ce_count,
                                        ras_block_str(info->head.block));
+               } else {
+                       dev_info(adev->dev, "%ld correctable hardware errors "
+                                       "detected in %s block, no user "
+                                       "action is needed.\n",
+                                       obj->err_data.ce_count,
+                                       ras_block_str(info->head.block));
+               }
        }
        if (err_data.ue_count) {
-               dev_info(adev->dev, "%ld uncorrectable hardware errors "
+               if (adev->smuio.funcs &&
+                   adev->smuio.funcs->get_socket_id &&
+                   adev->smuio.funcs->get_die_id) {
+                       dev_info(adev->dev, "socket: %d, die: %d "
+                                       "%ld uncorrectable hardware errors "
                                        "detected in %s block\n",
+                                       adev->smuio.funcs->get_socket_id(adev),
+                                       adev->smuio.funcs->get_die_id(adev),
                                        obj->err_data.ue_count,
                                        ras_block_str(info->head.block));
+               } else {
+                       dev_info(adev->dev, "%ld uncorrectable hardware errors "
+                                       "detected in %s block\n",
+                                       obj->err_data.ue_count,
+                                       ras_block_str(info->head.block));
+               }
        }
 
        return 0;