thermal: intel: pch: improve the cooling delay log
authorZhang Rui <rui.zhang@intel.com>
Thu, 19 May 2022 14:35:08 +0000 (22:35 +0800)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 19 May 2022 17:40:25 +0000 (19:40 +0200)
Previously, during suspend, intel_pch_thermal driver logs for every
cooling iteration, about the current PCH temperature and number of cooling
iterations that have been tried, like below

[  100.955526] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 1 times for 100 ms duration
[  101.064156] intel_pch_thermal 0000:00:14.2: CPU-PCH current temp [53C] higher than the threshold temp [50C], sleep 2 times for 100 ms duration

After changing the default delay_cnt to 600, in practice, it is common to
see tens of the above messages if the system is suspended when PCH
overheats. Thus, change this log message from dev_warn to dev_dbg because
it is only useful when we want to check the temperature trend.

At the same time, there is always a one-line message given by the driver
with the patch applied, with below four possibilities.

1. PCH is cool, no cooling delay needed
[ 1791.902853] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [48C]

2. PCH overheats and becomes cool after the cooling delays
[ 1475.511617] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C] after 30700 ms delay

3. PCH still overheats after the overall cooling timeout
[ 2250.157487] intel_pch_thermal 0000:00:12.0: CPU-PCH is hot [60C] after 60000 ms delay. S0ix might fail

4. PCH aborts cooling because of wakeup event detected during the delay
[ 1933.639509] intel_pch_thermal 0000:00:12.0: Wakeup event detected, abort cooling

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/thermal/intel/intel_pch_thermal.c

index b7b32e2..c1fa2b2 100644 (file)
@@ -197,7 +197,7 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
 static int pch_wpt_suspend(struct pch_thermal_device *ptd)
 {
        u8 tsel;
-       int pch_delay_cnt = 1;
+       int pch_delay_cnt = 0;
        u16 pch_thr_temp, pch_cur_temp;
 
        /* Shutdown the thermal sensor if it is not enabled by BIOS */
@@ -233,29 +233,38 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd)
         * temperature stays above threshold, notify the warning message
         * which helps to indentify the reason why S0ix entry was rejected.
         */
-       while (pch_delay_cnt <= delay_cnt) {
+       while (pch_delay_cnt < delay_cnt) {
                if (pch_cur_temp < pch_thr_temp)
                        break;
 
-               if (pm_wakeup_pending())
-                       break;
+               if (pm_wakeup_pending()) {
+                       dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n");
+                       return 0;
+               }
 
-               dev_warn(&ptd->pdev->dev,
+               pch_delay_cnt++;
+               dev_dbg(&ptd->pdev->dev,
                        "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n",
                        pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout);
                msleep(delay_timeout);
                /* Read the PCH current temperature for next cycle. */
                pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
-               pch_delay_cnt++;
        }
 
        if (pch_cur_temp >= pch_thr_temp)
                dev_warn(&ptd->pdev->dev,
-                       "CPU-PCH is hot [%dC] even after delay, continue to suspend. S0ix might fail\n",
-                       pch_cur_temp);
-       else
-               dev_info(&ptd->pdev->dev,
-                       "CPU-PCH is cool [%dC], continue to suspend\n", pch_cur_temp);
+                       "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n",
+                       pch_cur_temp, pch_delay_cnt * delay_timeout);
+       else {
+               if (pch_delay_cnt)
+                       dev_info(&ptd->pdev->dev,
+                               "CPU-PCH is cool [%dC] after %d ms delay\n",
+                               pch_cur_temp, pch_delay_cnt * delay_timeout);
+               else
+                       dev_info(&ptd->pdev->dev,
+                               "CPU-PCH is cool [%dC]\n",
+                               pch_cur_temp);
+       }
 
        return 0;
 }