drm/amd: Introduce `AMDGPU_PP_SENSOR_GPU_INPUT_POWER`
authorMario Limonciello <mario.limonciello@amd.com>
Thu, 10 Aug 2023 10:31:56 +0000 (05:31 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 15 Aug 2023 22:08:29 +0000 (18:08 -0400)
Some GPUs have been overloading average power values and input power
values. To disambiguate these, introduce a new
`AMDGPU_PP_SENSOR_GPU_INPUT_POWER` and the GPUs that share input
power update to use this instead of average power.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2746
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
20 files changed:
drivers/gpu/drm/amd/include/kgd_pp_interface.h
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c

index 9098940..6582cdf 100644 (file)
@@ -133,6 +133,7 @@ enum amd_pp_sensors {
        AMDGPU_PP_SENSOR_VCE_POWER,
        AMDGPU_PP_SENSOR_UVD_POWER,
        AMDGPU_PP_SENSOR_GPU_POWER,
+       AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
        AMDGPU_PP_SENSOR_SS_APU_SHARE,
        AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
        AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
index 816f034..bb42851 100644 (file)
@@ -2809,7 +2809,7 @@ static ssize_t amdgpu_hwmon_show_power_input(struct device *dev,
 {
        unsigned int val;
 
-       val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_POWER);
+       val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER);
        if (val < 0)
                return val;
 
index c91b2a3..5a23714 100644 (file)
@@ -4039,7 +4039,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
                *((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
                *size = 4;
                return 0;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
        case AMDGPU_PP_SENSOR_VDDGFX:
                if ((data->vr_config & VRCONF_VDDGFX_MASK) ==
index 52ae6fa..6d6bc6a 100644 (file)
@@ -3966,7 +3966,7 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
                *((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value);
                break;
        case AMDGPU_PP_SENSOR_VDDGFX:
index 4bd573d..4600679 100644 (file)
@@ -1529,7 +1529,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
                *((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
                if (!ret)
                        *size = 4;
index 492ca33..b6dd7f8 100644 (file)
@@ -2253,7 +2253,7 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
                *((uint32_t *)value) = data->vce_power_gated ? 0 : 1;
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                *size = 16;
                ret = vega20_get_gpu_power(hwmgr, (uint32_t *)value);
                break;
index 6e2069d..6b5e220 100644 (file)
@@ -1398,6 +1398,7 @@ typedef enum {
        METRICS_PCIE_RATE,
        METRICS_PCIE_WIDTH,
        METRICS_CURR_FANPWM,
+       METRICS_CURR_SOCKETPOWER,
 } MetricsMember_t;
 
 enum smu_cmn2asic_mapping_type {
index c49f770..e5f629a 100644 (file)
@@ -1169,6 +1169,7 @@ static int arcturus_read_sensor(struct smu_context *smu,
                ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index ca4d97b..4840e4d 100644 (file)
@@ -154,10 +154,14 @@ cyan_skillfish_get_smu_metrics_data(struct smu_context *smu,
        case METRICS_CURR_UCLK:
                *value = metrics->Current.MemclkFrequency;
                break;
-       case METRICS_AVERAGE_SOCKETPOWER:
+       case METRICS_CURR_SOCKETPOWER:
                *value = (metrics->Current.CurrentSocketPower << 8) /
                                1000;
                break;
+       case METRICS_AVERAGE_SOCKETPOWER:
+               *value = (metrics->Average.CurrentSocketPower << 8) /
+                               1000;
+               break;
        case METRICS_TEMPERATURE_EDGE:
                *value = metrics->Current.GfxTemperature / 100 *
                                SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
@@ -214,6 +218,12 @@ static int cyan_skillfish_read_sensor(struct smu_context *smu,
                                                   (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
+               ret = cyan_skillfish_get_smu_metrics_data(smu,
+                                                  METRICS_CURR_SOCKETPOWER,
+                                                  (uint32_t *)data);
+               *size = 4;
+               break;
        case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
                ret = cyan_skillfish_get_smu_metrics_data(smu,
                                                   METRICS_TEMPERATURE_HOTSPOT,
index e655071..06474bb 100644 (file)
@@ -2240,6 +2240,7 @@ static int navi10_read_sensor(struct smu_context *smu,
                ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index f0800c0..6dead62 100644 (file)
@@ -1962,6 +1962,7 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu,
                        ret = -EOPNOTSUPP;
                }
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index 185d0b5..8a2c9c7 100644 (file)
@@ -390,6 +390,10 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu,
                *value = metrics->Current.UvdActivity;
                break;
        case METRICS_AVERAGE_SOCKETPOWER:
+               *value = (metrics->Average.CurrentSocketPower << 8) /
+               1000;
+               break;
+       case METRICS_CURR_SOCKETPOWER:
                *value = (metrics->Current.CurrentSocketPower << 8) /
                1000;
                break;
@@ -1542,6 +1546,12 @@ static int vangogh_read_sensor(struct smu_context *smu,
                                                   (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
+               ret = vangogh_common_get_smu_metrics_data(smu,
+                                                  METRICS_CURR_SOCKETPOWER,
+                                                  (uint32_t *)data);
+               *size = 4;
+               break;
        case AMDGPU_PP_SENSOR_EDGE_TEMP:
                ret = vangogh_common_get_smu_metrics_data(smu,
                                                   METRICS_TEMPERATURE_EDGE,
index a756935..7b5ccb9 100644 (file)
@@ -1197,7 +1197,7 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu,
        case METRICS_AVERAGE_VCNACTIVITY:
                *value = metrics->AverageUvdActivity / 100;
                break;
-       case METRICS_AVERAGE_SOCKETPOWER:
+       case METRICS_CURR_SOCKETPOWER:
                if (((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) && (adev->pm.fw_version >= 0x40000f)) ||
                ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0)) && (adev->pm.fw_version >= 0x373200)))
                        *value = metrics->CurrentSocketPower << 8;
@@ -1297,9 +1297,9 @@ static int renoir_read_sensor(struct smu_context *smu,
                                                  (uint32_t *)data);
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                ret = renoir_get_smu_metrics_data(smu,
-                                                 METRICS_AVERAGE_SOCKETPOWER,
+                                                 METRICS_CURR_SOCKETPOWER,
                                                  (uint32_t *)data);
                *size = 4;
                break;
index 8f26123..4e5043b 100644 (file)
@@ -1183,6 +1183,7 @@ static int aldebaran_read_sensor(struct smu_context *smu,
                ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index 48b0352..203dc50 100644 (file)
@@ -997,6 +997,7 @@ static int smu_v13_0_0_read_sensor(struct smu_context *smu,
                                                       (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index ef37dda..a4e8798 100644 (file)
@@ -321,6 +321,9 @@ static int smu_v13_0_4_get_smu_metrics_data(struct smu_context *smu,
                *value = metrics->UvdActivity;
                break;
        case METRICS_AVERAGE_SOCKETPOWER:
+               *value = (metrics->AverageSocketPower << 8) / 1000;
+               break;
+       case METRICS_CURR_SOCKETPOWER:
                *value = (metrics->CurrentSocketPower << 8) / 1000;
                break;
        case METRICS_TEMPERATURE_EDGE:
@@ -575,6 +578,12 @@ static int smu_v13_0_4_read_sensor(struct smu_context *smu,
                                                       (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
+               ret = smu_v13_0_4_get_smu_metrics_data(smu,
+                                                      METRICS_CURR_SOCKETPOWER,
+                                                      (uint32_t *)data);
+               *size = 4;
+               break;
        case AMDGPU_PP_SENSOR_EDGE_TEMP:
                ret = smu_v13_0_4_get_smu_metrics_data(smu,
                                                       METRICS_TEMPERATURE_EDGE,
index 87a79e6..5f8e70d 100644 (file)
@@ -288,7 +288,7 @@ static int smu_v13_0_5_get_smu_metrics_data(struct smu_context *smu,
        case METRICS_AVERAGE_VCNACTIVITY:
                *value = metrics->UvdActivity;
                break;
-       case METRICS_AVERAGE_SOCKETPOWER:
+       case METRICS_CURR_SOCKETPOWER:
                *value = (metrics->CurrentSocketPower << 8) / 1000;
                break;
        case METRICS_TEMPERATURE_EDGE:
@@ -332,9 +332,9 @@ static int smu_v13_0_5_read_sensor(struct smu_context *smu,
                                                                (uint32_t *)data);
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                ret = smu_v13_0_5_get_smu_metrics_data(smu,
-                                                               METRICS_AVERAGE_SOCKETPOWER,
+                                                               METRICS_CURR_SOCKETPOWER,
                                                                (uint32_t *)data);
                *size = 4;
                break;
index 362acbb..aedf1c4 100644 (file)
@@ -714,7 +714,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
        case METRICS_AVERAGE_MEMACTIVITY:
                *value = SMUQ10_TO_UINT(metrics->DramBandwidthUtilization);
                break;
-       case METRICS_AVERAGE_SOCKETPOWER:
+       case METRICS_CURR_SOCKETPOWER:
                *value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
                break;
        case METRICS_TEMPERATURE_HOTSPOT:
@@ -1139,15 +1139,6 @@ static int smu_v13_0_6_get_current_activity_percent(struct smu_context *smu,
        return ret;
 }
 
-static int smu_v13_0_6_get_gpu_power(struct smu_context *smu, uint32_t *value)
-{
-       if (!value)
-               return -EINVAL;
-
-       return smu_v13_0_6_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER,
-                                              value);
-}
-
 static int smu_v13_0_6_thermal_get_temperature(struct smu_context *smu,
                                               enum amd_pp_sensors sensor,
                                               uint32_t *value)
@@ -1193,8 +1184,10 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu,
                                                               (uint32_t *)data);
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
-               ret = smu_v13_0_6_get_gpu_power(smu, (uint32_t *)data);
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
+               ret = smu_v13_0_6_get_smu_metrics_data(smu,
+                                                      METRICS_CURR_SOCKETPOWER,
+                                                      (uint32_t *)data);
                *size = 4;
                break;
        case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
index 690f89f..880a83e 100644 (file)
@@ -978,6 +978,7 @@ static int smu_v13_0_7_read_sensor(struct smu_context *smu,
                                                       (uint32_t *)data);
                *size = 4;
                break;
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
        default:
                ret = -EOPNOTSUPP;
                break;
index a1be202..14fe45d 100644 (file)
@@ -365,7 +365,7 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu,
        case METRICS_AVERAGE_VCNACTIVITY:
                *value = metrics->UvdActivity;
                break;
-       case METRICS_AVERAGE_SOCKETPOWER:
+       case METRICS_CURR_SOCKETPOWER:
                *value = (metrics->CurrentSocketPower << 8) / 1000;
                break;
        case METRICS_TEMPERATURE_EDGE:
@@ -423,9 +423,9 @@ static int yellow_carp_read_sensor(struct smu_context *smu,
                                                                (uint32_t *)data);
                *size = 4;
                break;
-       case AMDGPU_PP_SENSOR_GPU_POWER:
+       case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
                ret = yellow_carp_get_smu_metrics_data(smu,
-                                                               METRICS_AVERAGE_SOCKETPOWER,
+                                                               METRICS_CURR_SOCKETPOWER,
                                                                (uint32_t *)data);
                *size = 4;
                break;