drm/amd/pm: add send bad channel info function
authorStanley.Yang <Stanley.Yang@amd.com>
Thu, 3 Mar 2022 09:51:24 +0000 (17:51 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 15 Mar 2022 18:25:16 +0000 (14:25 -0400)
support message SMU update bad channel info to update HBM bad channel
info in OOB table

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/pm/amdgpu_dpm.c
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c

index 1d63f1e..9a892d6 100644 (file)
@@ -507,6 +507,18 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size)
        return ret;
 }
 
+int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret = 0;
+
+       mutex_lock(&adev->pm.mutex);
+       ret = smu_send_hbm_bad_channel_flag(smu, size);
+       mutex_unlock(&adev->pm.mutex);
+
+       return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
                                  enum pp_clock_type type,
                                  uint32_t *min,
index ddfa55b..3e78b30 100644 (file)
@@ -412,6 +412,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
+int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size);
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
                                       enum pp_clock_type type,
                                       uint32_t *min,
index 7e79a67..f154475 100644 (file)
@@ -3052,3 +3052,13 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size)
 
        return ret;
 }
+
+int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)
+{
+       int ret = 0;
+
+       if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag)
+               ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size);
+
+       return ret;
+}
index fbef3ab..ef57b60 100644 (file)
@@ -1292,6 +1292,12 @@ struct pptable_funcs {
         * @set_config_table: Apply the input DriverSmuConfig table settings.
         */
        int (*set_config_table)(struct smu_context *smu, struct config_table_setting *table);
+
+       /**
+        * @sned_hbm_bad_channel_flag:  message SMU to update bad channel info
+        *                                                                              of SMUBUS table.
+        */
+       int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t size);
 };
 
 typedef enum {
@@ -1428,5 +1434,6 @@ int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
 int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
 void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
 int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
+int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
 #endif
 #endif
index ab66a4b..0f498ba 100644 (file)
 #define PPSMC_MSG_GfxDriverResetRecovery       0x42
 #define PPSMC_MSG_BoardPowerCalibration        0x43
 #define PPSMC_MSG_HeavySBR                      0x45
-#define PPSMC_Message_Count                    0x46
+#define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46
+#define PPSMC_Message_Count                    0x47
 
 
 //PPSMC Reset Types
index d787c3b..9f6f306 100644 (file)
        __SMU_DUMMY_MAP(ForceGfxVid),             \
        __SMU_DUMMY_MAP(Spare0),                  \
        __SMU_DUMMY_MAP(UnforceGfxVid),           \
-       __SMU_DUMMY_MAP(HeavySBR),
+       __SMU_DUMMY_MAP(HeavySBR),                      \
+       __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index 890acc4..cd81f84 100644 (file)
  */
 #define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
 
+/*
+ * SMU support BAD CHENNEL info MSG since version 68.51.00,
+ * use this to check ECCTALE feature whether support
+ */
+#define SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION 0x00443300
+
 static const struct smu_temperature_range smu13_thermal_policy[] =
 {
        {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
@@ -140,6 +146,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
        MSG_MAP(GfxDriverResetRecovery,              PPSMC_MSG_GfxDriverResetRecovery,          0),
        MSG_MAP(BoardPowerCalibration,               PPSMC_MSG_BoardPowerCalibration,           0),
        MSG_MAP(HeavySBR,                            PPSMC_MSG_HeavySBR,                        0),
+       MSG_MAP(SetBadHBMPagesRetiredFlagsPerChannel,   PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel, 0),
 };
 
 static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
@@ -1997,6 +2004,41 @@ static int aldebaran_smu_send_hbm_bad_page_num(struct smu_context *smu,
        return ret;
 }
 
+static int aldebaran_check_bad_channel_info_support(struct smu_context *smu)
+{
+       uint32_t if_version = 0xff, smu_version = 0xff;
+       int ret = 0;
+
+       ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
+       if (ret) {
+               /* return not support if failed get smu_version */
+               ret = -EOPNOTSUPP;
+       }
+
+       if (smu_version < SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION)
+               ret = -EOPNOTSUPP;
+
+       return ret;
+}
+
+static int aldebaran_send_hbm_bad_channel_flag(struct smu_context *smu,
+               uint32_t size)
+{
+       int ret = 0;
+
+       ret = aldebaran_check_bad_channel_info_support(smu);
+       if (ret)
+               return ret;
+
+       /* message SMU to update the bad channel info on SMUBUS */
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetBadHBMPagesRetiredFlagsPerChannel, size, NULL);
+       if (ret)
+               dev_err(smu->adev->dev, "[%s] failed to message SMU to update HBM bad channel info\n",
+                               __func__);
+
+       return ret;
+}
+
 static const struct pptable_funcs aldebaran_ppt_funcs = {
        /* init dpm */
        .get_allowed_feature_mask = aldebaran_get_allowed_feature_mask,
@@ -2062,6 +2104,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
        .i2c_fini = aldebaran_i2c_control_fini,
        .send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num,
        .get_ecc_info = aldebaran_get_ecc_info,
+       .send_hbm_bad_channel_flag = aldebaran_send_hbm_bad_channel_flag,
 };
 
 void aldebaran_set_ppt_funcs(struct smu_context *smu)