From 30feef0676092bdb4b8697e68b8d5864d54f096f Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 8 Feb 2023 14:54:01 +0800 Subject: [PATCH] drm/amdgpu: add RAS error count reset for gfx_v9_4_3 Add GFX RAS error count reset function. v2: remove xcp operation. only select_se_sh when instance number is more than 1. v3: add check for se_num before select_se_sh. change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index bfd041b..ac5270d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3773,6 +3773,39 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, err_data->ue_count += ue_count; } +static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + uint32_t i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 || + gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ce_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + } + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, int xcc_id) { @@ -3882,6 +3915,11 @@ static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, gfx_v9_4_3_inst_query_ras_err_count); } +static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); +} + static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -- 2.7.4