From 25c933b1c4fcfaa65ed735e9782fdb2622f7b7e8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 23 Jul 2020 18:03:35 +0800 Subject: [PATCH] drm/amd/powerplay: add new sysfs interface for retrieving gpu metrics(V2) A new interface for UMD to retrieve gpu metrics data. V2: rich the documentation Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 57 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 20 +++++++++ drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 ++ 6 files changed, 90 insertions(+) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 1711235..0f7679a 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -206,6 +206,12 @@ pp_power_profile_mode .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c :doc: mem_busy_percent +gpu_metrics +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: gpu_metrics + GPU Product Information ======================= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index aa27fe6..b190c0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -369,6 +369,9 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_ppfeature_status(\ (adev)->powerplay.pp_handle, (ppfeatures))) +#define amdgpu_dpm_get_gpu_metrics(adev, table) \ + ((adev)->powerplay.pp_funcs->get_gpu_metrics((adev)->powerplay.pp_handle, table)) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 576e3ac..1705e32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2120,6 +2120,59 @@ static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev, return count; } +/** + * DOC: gpu_metrics + * + * The amdgpu driver provides a sysfs API for retrieving current gpu + * metrics data. The file gpu_metrics is used for this. Reading the + * file will dump all the current gpu metrics data. + * + * These data include temperature, frequency, engines utilization, + * power consume, throttler status, fan speed and cpu core statistics( + * available for APU only). That's it will give a snapshot of all sensors + * at the same time. + */ +static ssize_t amdgpu_get_gpu_metrics(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + void *gpu_metrics; + ssize_t size = 0; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + down_read(&adev->reset_sem); + if (is_support_sw_smu(adev)) + size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics); + else if (adev->powerplay.pp_funcs->get_gpu_metrics) + size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics); + up_read(&adev->reset_sem); + + if (size <= 0) + goto out; + + if (size >= PAGE_SIZE) + size = PAGE_SIZE - 1; + + memcpy(buf, gpu_metrics, size); + +out: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC), @@ -2143,6 +2196,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(gpu_metrics, ATTR_FLAG_BASIC), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, @@ -2192,6 +2246,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(pp_features)) { if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(gpu_metrics)) { + if (asic_type < CHIP_VEGA12) + *states = ATTR_STATE_UNSUPPORTED; } if (asic_type == CHIP_ARCTURUS) { diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5f38ee62..0aec28fd 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -322,6 +322,7 @@ struct amd_pm_funcs { int (*asic_reset_mode_2)(void *handle); int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); + ssize_t (*get_gpu_metrics)(void *handle, void **table); }; struct metrics_table_header { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index d03b4852e..f3f50b5 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -2516,3 +2516,23 @@ int smu_get_dpm_clock_table(struct smu_context *smu, return ret; } + +ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + ssize_t size; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->ppt_funcs->get_gpu_metrics) + return -EOPNOTSUPP; + + mutex_lock(&smu->mutex); + + size = smu->ppt_funcs->get_gpu_metrics(smu, table); + + mutex_unlock(&smu->mutex); + + return size; +} diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index b57b104..a08155b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -589,6 +589,7 @@ struct pptable_funcs { void (*log_thermal_throttling_event)(struct smu_context *smu); size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf); int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask); + ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table); }; typedef enum { @@ -791,5 +792,7 @@ int smu_get_dpm_clock_table(struct smu_context *smu, int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); +ssize_t smu_sys_get_gpu_metrics(struct smu_context *smu, void **table); + #endif #endif -- 2.7.4