drm/amd/powerplay: add Arcturus support for gpu metrics export
authorEvan Quan <evan.quan@amd.com>
Fri, 24 Jul 2020 02:42:39 +0000 (10:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Aug 2020 19:44:13 +0000 (15:44 -0400)
Add Arcturus gpu metrics export interface.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
drivers/gpu/drm/amd/powerplay/smu_v11_0.c

index d292898..5aa0d5b 100644 (file)
@@ -79,6 +79,8 @@
 /* possible frequency drift (1Mhz) */
 #define EPSILON                                1
 
+#define smnPCIE_ESM_CTRL                       0x111003D0
+
 static const struct cmn2asic_msg_mapping arcturus_message_map[SMU_MSG_MAX_COUNT] = {
        MSG_MAP(TestMessage,                         PPSMC_MSG_TestMessage,                     0),
        MSG_MAP(GetSmuVersion,                       PPSMC_MSG_GetSmuVersion,                   1),
@@ -234,6 +236,13 @@ static int arcturus_tables_init(struct smu_context *smu)
                return -ENOMEM;
        smu_table->metrics_time = 0;
 
+       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_0);
+       smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
+       if (!smu_table->gpu_metrics_table) {
+               kfree(smu_table->metrics_table);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -2242,6 +2251,88 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
        kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
 }
 
+static int arcturus_get_current_pcie_link_speed(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+       uint32_t esm_ctrl;
+
+       /* TODO: confirm this on real target */
+       esm_ctrl = RREG32_PCIE(smnPCIE_ESM_CTRL);
+       if ((esm_ctrl >> 15) & 0x1FFFF)
+               return (((esm_ctrl >> 8) & 0x3F) + 128);
+
+       return smu_v11_0_get_current_pcie_link_speed(smu);
+}
+
+static ssize_t arcturus_get_gpu_metrics(struct smu_context *smu,
+                                       void **table)
+{
+       struct smu_table_context *smu_table = &smu->smu_table;
+       struct gpu_metrics_v1_0 *gpu_metrics =
+               (struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table;
+       SmuMetrics_t metrics;
+       int ret = 0;
+
+       mutex_lock(&smu->metrics_lock);
+
+       ret = smu_cmn_update_table(smu,
+                                  SMU_TABLE_SMU_METRICS,
+                                  0,
+                                  smu_table->metrics_table,
+                                  false);
+       if (ret) {
+               dev_info(smu->adev->dev, "Failed to export SMU metrics table!\n");
+               mutex_unlock(&smu->metrics_lock);
+               return ret;
+       }
+       smu_table->metrics_time = jiffies;
+
+       memcpy(&metrics, smu_table->metrics_table, sizeof(SmuMetrics_t));
+
+       mutex_unlock(&smu->metrics_lock);
+
+       smu_v11_0_init_gpu_metrics_v1_0(gpu_metrics);
+
+       gpu_metrics->temperature_edge = metrics.TemperatureEdge;
+       gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
+       gpu_metrics->temperature_mem = metrics.TemperatureHBM;
+       gpu_metrics->temperature_vrgfx = metrics.TemperatureVrGfx;
+       gpu_metrics->temperature_vrsoc = metrics.TemperatureVrSoc;
+       gpu_metrics->temperature_vrmem = metrics.TemperatureVrMem;
+
+       gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
+       gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
+       gpu_metrics->average_mm_activity = metrics.VcnActivityPercentage;
+
+       gpu_metrics->average_socket_power = metrics.AverageSocketPower;
+       gpu_metrics->energy_accumulator = metrics.EnergyAccumulator;
+
+       gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
+       gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
+       gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency;
+       gpu_metrics->average_vclk0_frequency = metrics.AverageVclkFrequency;
+       gpu_metrics->average_dclk0_frequency = metrics.AverageDclkFrequency;
+
+       gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
+       gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
+       gpu_metrics->current_uclk = metrics.CurrClock[PPCLK_UCLK];
+       gpu_metrics->current_vclk0 = metrics.CurrClock[PPCLK_VCLK];
+       gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
+
+       gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+
+       gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
+
+       gpu_metrics->pcie_link_width =
+                       smu_v11_0_get_current_pcie_link_width(smu);
+       gpu_metrics->pcie_link_speed =
+                       arcturus_get_current_pcie_link_speed(smu);
+
+       *table = (void *)gpu_metrics;
+
+       return sizeof(struct gpu_metrics_v1_0);
+}
+
 static const struct pptable_funcs arcturus_ppt_funcs = {
        /* init dpm */
        .get_allowed_feature_mask = arcturus_get_allowed_feature_mask,
@@ -2319,6 +2410,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
        .log_thermal_throttling_event = arcturus_log_thermal_throttling_event,
        .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
        .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
+       .get_gpu_metrics = arcturus_get_gpu_metrics,
 };
 
 void arcturus_set_ppt_funcs(struct smu_context *smu)
index a08155b..ec2d2aa 100644 (file)
@@ -274,6 +274,9 @@ struct smu_table_context
 
        void                            *overdrive_table;
        void                            *boot_overdrive_table;
+
+       uint32_t                        gpu_metrics_table_size;
+       void                            *gpu_metrics_table;
 };
 
 struct smu_dpm_context {
index aeb1265..f2a5221 100644 (file)
@@ -272,5 +272,7 @@ int smu_v11_0_get_current_pcie_link_speed_level(struct smu_context *smu);
 
 int smu_v11_0_get_current_pcie_link_speed(struct smu_context *smu);
 
+void smu_v11_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics);
+
 #endif
 #endif
index ff90e20..ff574eb 100644 (file)
@@ -417,10 +417,12 @@ int smu_v11_0_fini_smc_tables(struct smu_context *smu)
        struct smu_table_context *smu_table = &smu->smu_table;
        struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
 
+       kfree(smu_table->gpu_metrics_table);
        kfree(smu_table->boot_overdrive_table);
        kfree(smu_table->overdrive_table);
        kfree(smu_table->max_sustainable_clocks);
        kfree(smu_table->driver_pptable);
+       smu_table->gpu_metrics_table = NULL;
        smu_table->boot_overdrive_table = NULL;
        smu_table->overdrive_table = NULL;
        smu_table->max_sustainable_clocks = NULL;
@@ -1971,3 +1973,15 @@ int smu_v11_0_get_current_pcie_link_speed(struct smu_context *smu)
 
        return link_speed[speed_level];
 }
+
+void smu_v11_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics)
+{
+       memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_0));
+
+       gpu_metrics->common_header.structure_size =
+                               sizeof(struct gpu_metrics_v1_0);
+       gpu_metrics->common_header.format_revision = 1;
+       gpu_metrics->common_header.content_revision = 0;
+
+       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
+}