drm/amdgpu: update gfx 9.4.2 ras error reporting
authorJohn Clements <john.clements@amd.com>
Mon, 12 Apr 2021 08:12:56 +0000 (16:12 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 15 Apr 2021 20:02:47 +0000 (16:02 -0400)
only output ras error status if an error bit is set or error counter is set

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c

index 9ca76a3..0070c09 100644 (file)
@@ -997,8 +997,9 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev,
                               blk->clear);
 
                        /* print the edc count */
-                       gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt,
-                                                    ded_cnt);
+                       if (sec_cnt || ded_count)
+                               gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt,
+                                                            ded_cnt);
                }
        }
 
@@ -1095,7 +1096,7 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
                        gfx_v9_4_2_select_se_sh(adev, i, 0, j);
                        reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
                                gfx_v9_4_2_rdrsp_status_regs));
-                       if (reg_value)
+                       if ((reg_value & 0xFFF) != GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK)
                                dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
                                                j, reg_value);
                        /* clear after read */
@@ -1112,19 +1113,19 @@ static void gfx_v9_4_2_query_utc_err_status(struct amdgpu_device *adev)
        uint32_t data;
 
        data = RREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS);
-       if (!data) {
+       if (data) {
                dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
                WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
        }
 
        data = RREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS);
-       if (!data) {
+       if (data) {
                dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
                WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
        }
 
        data = RREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS);
-       if (!data) {
+       if (data) {
                dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
                WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
        }