From 8c0bba64795fdfa89299aa160887ef3ff85c77d2 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 21 May 2020 12:36:44 +0800 Subject: [PATCH] drm/amd/powerplay: implement ASIC specific thermal throttling logging Enable this for Arcturus only for now. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 44 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/smu_internal.h | 2 ++ 3 files changed, 47 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 67980f5..5c1b2d7 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -2329,6 +2329,49 @@ static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en) NULL); } +static const struct throttling_logging_label { + uint32_t feature_mask; + const char *label; +} logging_label[] = { + {(1U << THROTTLER_TEMP_HOTSPOT_BIT), "GPU"}, + {(1U << THROTTLER_TEMP_MEM_BIT), "HBM"}, + {(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"}, + {(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"}, + {(1U << THROTTLER_TEMP_VR_SOC_BIT), "VR of SOC rail"}, + {(1U << THROTTLER_VRHOT0_BIT), "VR0 HOT"}, + {(1U << THROTTLER_VRHOT1_BIT), "VR1 HOT"}, +}; +static void arcturus_log_thermal_throttling_event(struct smu_context *smu) +{ + int throttler_idx, throtting_events = 0, buf_idx = 0; + struct amdgpu_device *adev = smu->adev; + SmuMetrics_t metrics; + char log_buf[256]; + + arcturus_get_metrics_table(smu, &metrics); + + memset(log_buf, 0, sizeof(log_buf)); + for (throttler_idx = 0; throttler_idx < ARRAY_SIZE(logging_label); + throttler_idx++) { + if (metrics.ThrottlerStatus & logging_label[throttler_idx].feature_mask) { + throtting_events++; + buf_idx += snprintf(log_buf + buf_idx, + sizeof(log_buf) - buf_idx, + "%s%s", + throtting_events > 1 ? " and " : "", + logging_label[throttler_idx].label); + if (buf_idx >= sizeof(log_buf)) { + pr_err("buffer overflow!\n"); + log_buf[sizeof(log_buf) - 1] = '\0'; + break; + } + } + } + + dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", + log_buf); +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2423,6 +2466,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_pptable_power_limit = arcturus_get_pptable_power_limit, .set_df_cstate = arcturus_set_df_cstate, .allow_xgmi_power_down = arcturus_allow_xgmi_power_down, + .log_thermal_throttling_event = arcturus_log_thermal_throttling_event, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index bac7e13..1223d298 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -575,6 +575,7 @@ struct pptable_funcs { uint32_t (*get_pptable_power_limit)(struct smu_context *smu); int (*disable_umc_cdr_12gbps_workaround)(struct smu_context *smu); int (*set_power_source)(struct smu_context *smu, enum smu_power_src_type power_src); + void (*log_thermal_throttling_event)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index c974448..6b627d6 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -219,4 +219,6 @@ static inline int smu_send_smc_msg(struct smu_context *smu, enum smu_message_typ #define smu_i2c_eeprom_fini(smu, control) \ ((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : 0) +#define smu_log_thermal_throttling(smu) \ + ((smu)->ppt_funcs->log_thermal_throttling_event ? (smu)->ppt_funcs->log_thermal_throttling_event((smu)) : 0) #endif -- 2.7.4