drm/amd/pm: Enable ecc_info table support for smu v13_0_10
authorCandice Li <candice.li@amd.com>
Mon, 16 Jan 2023 08:23:21 +0000 (16:23 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 6 Mar 2023 20:14:47 +0000 (15:14 -0500)
Support EccInfoTable which includes umc ras error count and
error address.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Stanley.Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c

index 923a9fb..27448ff 100644 (file)
@@ -46,6 +46,7 @@
 #include "asic_reg/mp/mp_13_0_0_sh_mask.h"
 #include "smu_cmn.h"
 #include "amdgpu_ras.h"
+#include "umc_v8_10.h"
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
 
 #define DEBUGSMC_MSG_Mode1Reset        2
 
+/*
+ * SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
+ * use this to check ECCTABLE feature whether support
+ */
+#define SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
+
 static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
        MSG_MAP(TestMessage,                    PPSMC_MSG_TestMessage,                 1),
        MSG_MAP(GetSmuVersion,                  PPSMC_MSG_GetSmuVersion,               1),
@@ -229,6 +236,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
        TAB_MAP(ACTIVITY_MONITOR_COEFF),
        [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
        TAB_MAP(I2C_COMMANDS),
+       TAB_MAP(ECCINFO),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -462,6 +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
                       AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
                        PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+       SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
+                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
        smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
        if (!smu_table->metrics_table)
@@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
        if (!smu_table->watermarks_table)
                goto err2_out;
 
+       smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL);
+       if (!smu_table->ecc_table)
+               goto err3_out;
+
        return 0;
 
+err3_out:
+       kfree(smu_table->watermarks_table);
 err2_out:
        kfree(smu_table->gpu_metrics_table);
 err1_out:
@@ -2036,6 +2052,64 @@ static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
        return ret;
 }
 
+static int smu_v13_0_0_check_ecc_table_support(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+       uint32_t if_version = 0xff, smu_version = 0xff;
+       int ret = 0;
+
+       ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
+       if (ret)
+               return -EOPNOTSUPP;
+
+       if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
+               (smu_version >= SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
+               return ret;
+       else
+               return -EOPNOTSUPP;
+}
+
+static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
+                                                                       void *table)
+{
+       struct smu_table_context *smu_table = &smu->smu_table;
+       struct amdgpu_device *adev = smu->adev;
+       EccInfoTable_t *ecc_table = NULL;
+       struct ecc_info_per_ch *ecc_info_per_channel = NULL;
+       int i, ret = 0;
+       struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
+
+       ret = smu_v13_0_0_check_ecc_table_support(smu);
+       if (ret)
+               return ret;
+
+       ret = smu_cmn_update_table(smu,
+                                       SMU_TABLE_ECCINFO,
+                                       0,
+                                       smu_table->ecc_table,
+                                       false);
+       if (ret) {
+               dev_info(adev->dev, "Failed to export SMU ecc table!\n");
+               return ret;
+       }
+
+       ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
+
+       for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
+               ecc_info_per_channel = &(eccinfo->ecc[i]);
+               ecc_info_per_channel->ce_count_lo_chip =
+                               ecc_table->EccInfo[i].ce_count_lo_chip;
+               ecc_info_per_channel->ce_count_hi_chip =
+                               ecc_table->EccInfo[i].ce_count_hi_chip;
+               ecc_info_per_channel->mca_umc_status =
+                               ecc_table->EccInfo[i].mca_umc_status;
+               ecc_info_per_channel->mca_umc_addr =
+                               ecc_table->EccInfo[i].mca_umc_addr;
+       }
+
+       return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
        .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -2111,6 +2185,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num,
        .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
        .gpo_control = smu_v13_0_gpo_control,
+       .get_ecc_info = smu_v13_0_0_get_ecc_info,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)