drm/amdgpu: only harvest gcea/mmea error status in aldebaran

author Hawking Zhang <Hawking.Zhang@amd.com>

Fri, 16 Apr 2021 09:30:12 +0000 (17:30 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 21 Apr 2021 01:35:55 +0000 (21:35 -0400)
author Hawking Zhang <Hawking.Zhang@amd.com>
Fri, 16 Apr 2021 09:30:12 +0000 (17:30 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 21 Apr 2021 01:35:55 +0000 (21:35 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c

index 0070c091b2b7f789d2f679b22c0c45cb92a2491f..44d37e40ab5082f9d0c3e3e4249958b27a47c836 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
           REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
  };
  
-static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs =
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
         { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
  
  static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
@@ -1041,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
         uint32_t i, j;
  
         mutex_lock(&adev->grbm_idx_mutex);
-       for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
-               for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
+       for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+               for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
                      j++) {
                         gfx_v9_4_2_select_se_sh(adev, i, 0, j);
-                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
+                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
                 }
         }
         gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -1090,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
  
         mutex_lock(&adev->grbm_idx_mutex);
  
-       for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
-               for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
+       for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+               for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
                      j++) {
                         gfx_v9_4_2_select_se_sh(adev, i, 0, j);
                         reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
-                               gfx_v9_4_2_rdrsp_status_regs));
-                       if ((reg_value & 0xFFF) != GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK)
+                               gfx_v9_4_2_ea_err_status_regs));
+                       if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+                           REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+                           REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
                                 dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
                                                 j, reg_value);
+                       }
                         /* clear after read */
-                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
+                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
                 }
         }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c

index f9dc13584f6c940d6b17653fa7d3953192d09332..1c999c56d995be1bdd91c199fa19a5e3a9641c12 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
         }
  }
  
-static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = {
+static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
         { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
         { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
         { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
@@ -1303,12 +1303,15 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
                 return;
  
-       for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) {
+       for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
                 reg_value =
-                       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i]));
-               if ((reg_value & 0xFFF) != MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK)
+                       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
+               if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
                         dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
                                         i, reg_value);
+               }
         }
  }
author	Hawking Zhang <Hawking.Zhang@amd.com>
	Fri, 16 Apr 2021 09:30:12 +0000 (17:30 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 21 Apr 2021 01:35:55 +0000 (21:35 -0400)
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c		patch \| blob \| history