From 1f8d3ad2a01957ddb7c8198ee293e0feaa03ea18 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 16 Apr 2021 17:30:12 +0800 Subject: [PATCH] drm/amdgpu: only harvest gcea/mmea error status in aldebaran In aldebaran, driver only needs to harvest SDP RdRspStatus, WrRspStatus and first parity error on RdRsp data. Check error type before harvest error information. Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 21 ++++++++++++--------- drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 11 +++++++---- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 0070c09..44d37e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, }; -static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs = +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, @@ -1041,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) uint32_t i, j; mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); } } gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1090,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( - gfx_v9_4_2_rdrsp_status_regs)); - if ((reg_value & 0xFFF) != GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK) + gfx_v9_4_2_ea_err_status_regs)); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", j, reg_value); + } /* clear after read */ - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f9dc135..1c999c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev) } } -static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = { +static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = { { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 }, { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 }, @@ -1303,12 +1303,15 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) return; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) { + for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) { reg_value = - RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i])); - if ((reg_value & 0xFFF) != MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK) + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i])); + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", i, reg_value); + } } } -- 2.7.4