From 11003c68b158c07b95e4ba3630a86aff9c442ee7 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Fri, 26 Feb 2021 09:17:10 +0800 Subject: [PATCH] drm/amdgpu: remove unnecessary reading for epprom header If the number of badpage records exceed the threshold, driver has updated both epprom header and control->tbl_hdr.header before gpu reset, therefore GPU recovery thread no need to read epprom header directly. v2: merge amdgpu_ras_check_err_threshold into amdgpu_ras_eeprom_check_err_threshold Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 38 ++++---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 4 +-- 5 files changed, 8 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a0af650..c024069 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4399,7 +4399,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 09546de..c669435 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2189,19 +2189,3 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) return false; } - -bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool exc_err_limit = false; - - if (con && (amdgpu_bad_page_threshold != 0)) - amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control, - &exc_err_limit); - - /* - * We are only interested in variable exc_err_limit, - * as it says if GPU is in bad state or not. - */ - return exc_err_limit; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index aed0716..42aab9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -491,8 +491,6 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev); unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); -bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev); - /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct eeprom_table_record *bps, int pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 19d9aa7..7f527f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -434,47 +434,21 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) return curr_address; } -int amdgpu_ras_eeprom_check_err_threshold( - struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit) +bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) { - struct amdgpu_device *adev = to_amdgpu_device(control); - unsigned char buff[EEPROM_ADDRESS_SIZE + - EEPROM_TABLE_HEADER_SIZE] = { 0 }; - struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - struct i2c_msg msg = { - .addr = control->i2c_address, - .flags = I2C_M_RD, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - int ret; - - *exceed_err_limit = false; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); if (!__is_ras_eeprom_supported(adev)) - return 0; - - /* read EEPROM table header */ - mutex_lock(&control->tbl_mutex); - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); - if (ret < 1) { - dev_err(adev->dev, "Failed to read EEPROM table header.\n"); - goto err; - } - - __decode_table_header_from_buff(hdr, &buff[2]); + return false; - if (hdr->header == EEPROM_TABLE_HDR_BAD) { + if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) { dev_warn(adev->dev, "This GPU is in BAD status."); dev_warn(adev->dev, "Please retire it or setting one bigger " "threshold value when reloading driver.\n"); - *exceed_err_limit = true; + return true; } -err: - mutex_unlock(&control->tbl_mutex); - return 0; + return false; } int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index c7a5e5c..1787211 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -80,9 +80,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, bool *exceed_err_limit); int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); -int amdgpu_ras_eeprom_check_err_threshold( - struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit); +bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev); int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, -- 2.7.4