Merge branches 'pm-pci', 'pm-sleep', 'pm-domains' and 'powercap'

author Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)

committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)
author Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)
committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c

index 7e74504..b496128 100644 (file)
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -163,9 +163,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
         if (ret || val > 1)
                 return -EINVAL;
  
-       get_online_cpus();
+       cpus_read_lock();
         set_boost(policy, val);
-       put_online_cpus();
+       cpus_read_unlock();
  
         return count;
  }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

index 45f3416..06c526d 100644 (file)
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2654,18 +2654,18 @@ int cpufreq_boost_trigger_state(int state)
         cpufreq_driver->boost_enabled = state;
         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
  
-       get_online_cpus();
+       cpus_read_lock();
         for_each_active_policy(policy) {
                 ret = cpufreq_driver->set_boost(policy, state);
                 if (ret)
                         goto err_reset_state;
         }
-       put_online_cpus();
+       cpus_read_unlock();
  
         return 0;
  
  err_reset_state:
-       put_online_cpus();
+       cpus_read_unlock();
  
         write_lock_irqsave(&cpufreq_driver_lock, flags);
         cpufreq_driver->boost_enabled = !state;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c

index ac361a8..eb4320b 100644 (file)
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -418,7 +418,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
         default_powersave_bias = powersave_bias;
         cpumask_clear(&done);
  
-       get_online_cpus();
+       cpus_read_lock();
         for_each_online_cpu(cpu) {
                 struct cpufreq_policy *policy;
                 struct policy_dbs_info *policy_dbs;
@@ -442,7 +442,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
                 od_tuners = dbs_data->tuners;
                 od_tuners->powersave_bias = default_powersave_bias;
         }
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  void od_register_powersave_bias_handler(unsigned int (*f)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

index bb45499..b4ffe6c 100644 (file)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -32,6 +32,7 @@
  #include <asm/cpu_device_id.h>
  #include <asm/cpufeature.h>
  #include <asm/intel-family.h>
+#include "../drivers/thermal/intel/thermal_interrupt.h"
  
  #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
  
@@ -219,6 +220,7 @@ struct global_params {
   * @sched_flags:       Store scheduler flags for possible cross CPU update
   * @hwp_boost_min:     Last HWP boosted min performance
   * @suspended:         Whether or not the driver has been suspended.
+ * @hwp_notify_work:   workqueue for HWP notifications.
   *
   * This structure stores per CPU instance data for all CPUs.
   */
@@ -257,6 +259,7 @@ struct cpudata {
         unsigned int sched_flags;
         u32 hwp_boost_min;
         bool suspended;
+       struct delayed_work hwp_notify_work;
  };
  
  static struct cpudata **all_cpu_data;
@@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
  
  /************************** sysfs end ************************/
  
+static void intel_pstate_notify_work(struct work_struct *work)
+{
+       mutex_lock(&intel_pstate_driver_lock);
+       cpufreq_update_policy(smp_processor_id());
+       wrmsrl(MSR_HWP_STATUS, 0);
+       mutex_unlock(&intel_pstate_driver_lock);
+}
+
+void notify_hwp_interrupt(void)
+{
+       unsigned int this_cpu = smp_processor_id();
+       struct cpudata *cpudata;
+       u64 value;
+
+       if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+               return;
+
+       rdmsrl(MSR_HWP_STATUS, value);
+       if (!(value & 0x01))
+               return;
+
+       cpudata = all_cpu_data[this_cpu];
+       schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
+}
+
+static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
+{
+       /* Enable HWP notification interrupt for guaranteed performance change */
+       if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
+               INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
+               wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
+       }
+}
+
  static void intel_pstate_hwp_enable(struct cpudata *cpudata)
  {
         /* First disable HWP notification interrupt as we don't process them */
@@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
         if (cpudata->epp_default == -EINVAL)
                 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+       intel_pstate_enable_hwp_interrupt(cpudata);
  }
  
  static int atom_get_min_pstate(void)
@@ -2969,7 +3008,7 @@ static void intel_pstate_driver_cleanup(void)
  {
         unsigned int cpu;
  
-       get_online_cpus();
+       cpus_read_lock();
         for_each_online_cpu(cpu) {
                 if (all_cpu_data[cpu]) {
                         if (intel_pstate_driver == &intel_pstate)
@@ -2979,7 +3018,7 @@ static void intel_pstate_driver_cleanup(void)
                         all_cpu_data[cpu] = NULL;
                 }
         }
-       put_online_cpus();
+       cpus_read_unlock();
  
         intel_pstate_driver = NULL;
  }
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c

index b9ccb6a..12ab401 100644 (file)
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1180,7 +1180,7 @@ static int powernowk8_init(void)
         if (!x86_match_cpu(powernow_k8_ids))
                 return -ENODEV;
  
-       get_online_cpus();
+       cpus_read_lock();
         for_each_online_cpu(i) {
                 smp_call_function_single(i, check_supported_cpu, &ret, 1);
                 if (!ret)
@@ -1188,10 +1188,10 @@ static int powernowk8_init(void)
         }
  
         if (supported_cpus != num_online_cpus()) {
-               put_online_cpus();
+               cpus_read_unlock();
                 return -ENODEV;
         }
-       put_online_cpus();
+       cpus_read_unlock();
  
         ret = cpufreq_register_driver(&cpufreq_amd64_driver);
         if (ret)
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c

index 005600c..23a06cb 100644 (file)
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -918,7 +918,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
         unsigned int cpu;
         cpumask_t mask;
  
-       get_online_cpus();
+       cpus_read_lock();
         cpumask_and(&mask, &chip->mask, cpu_online_mask);
         smp_call_function_any(&mask,
                               powernv_cpufreq_throttle_check, NULL, 0);
@@ -939,7 +939,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
                 cpufreq_cpu_put(policy);
         }
  out:
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
diff --git a/drivers/opp/core.c b/drivers/opp/core.c

index 5543c54..04b4691 100644 (file)
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -893,6 +893,10 @@ static int _set_required_opps(struct device *dev,
         if (!required_opp_tables)
                 return 0;
  
+       /* required-opps not fully initialized yet */
+       if (lazy_linking_pending(opp_table))
+               return -EBUSY;
+
         /*
          * We only support genpd's OPPs in the "required-opps" for now, as we
          * don't know much about other use cases. Error out if the required OPP
@@ -903,10 +907,6 @@ static int _set_required_opps(struct device *dev,
                 return -ENOENT;
         }
  
-       /* required-opps not fully initialized yet */
-       if (lazy_linking_pending(opp_table))
-               return -EBUSY;
-
         /* Single genpd case */
         if (!genpd_virt_devs)
                 return _set_required_opp(dev, dev, opp, 0);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c

index aacf575..a5e6759 100644 (file)
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1906,11 +1906,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
          * so that things like MSI message writing will behave as expected
          * (e.g. if the device really is in D0 at enable time).
          */
-       if (dev->pm_cap) {
-               u16 pmcsr;
-               pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
-               dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
-       }
+       pci_update_current_state(dev, dev->current_state);
  
         if (atomic_inc_return(&dev->enable_cnt) > 1)
                 return 0;               /* already enabled */
@@ -2495,7 +2491,14 @@ static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable
         if (enable) {
                 int error;
  
-               if (pci_pme_capable(dev, state))
+               /*
+                * Enable PME signaling if the device can signal PME from
+                * D3cold regardless of whether or not it can signal PME from
+                * the current target state, because that will allow it to
+                * signal PME when the hierarchy above it goes into D3cold and
+                * the device itself ends up in D3cold as a result of that.
+                */
+               if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
                         pci_pme_active(dev, true);
                 else
                         ret = 1;
@@ -2599,16 +2602,20 @@ static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup)
         if (dev->current_state == PCI_D3cold)
                 target_state = PCI_D3cold;
  
-       if (wakeup) {
+       if (wakeup && dev->pme_support) {
+               pci_power_t state = target_state;
+
                 /*
                  * Find the deepest state from which the device can generate
                  * PME#.
                  */
-               if (dev->pme_support) {
-                       while (target_state
-                             && !(dev->pme_support & (1 << target_state)))
-                               target_state--;
-               }
+               while (state && !(dev->pme_support & (1 << state)))
+                       state--;
+
+               if (state)
+                       return state;
+               else if (dev->pme_support & 1)
+                       return PCI_D0;
         }
  
         return target_state;
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c

index 73cf68a..7c0099e 100644 (file)
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -158,16 +158,16 @@ static int get_energy_counter(struct powercap_zone *power_zone,
         /* prevent CPU hotplug, make sure the RAPL domain does not go
          * away while reading the counter.
          */
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
  
         if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
                 *energy_raw = energy_now;
-               put_online_cpus();
+               cpus_read_unlock();
  
                 return 0;
         }
-       put_online_cpus();
+       cpus_read_unlock();
  
         return -EIO;
  }
@@ -216,11 +216,11 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
         if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
                 return -EACCES;
  
-       get_online_cpus();
+       cpus_read_lock();
         rapl_write_data_raw(rd, PL1_ENABLE, mode);
         if (rapl_defaults->set_floor_freq)
                 rapl_defaults->set_floor_freq(rd, mode);
-       put_online_cpus();
+       cpus_read_unlock();
  
         return 0;
  }
@@ -234,13 +234,13 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
                 *mode = false;
                 return 0;
         }
-       get_online_cpus();
+       cpus_read_lock();
         if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
-               put_online_cpus();
+               cpus_read_unlock();
                 return -EIO;
         }
         *mode = val;
-       put_online_cpus();
+       cpus_read_unlock();
  
         return 0;
  }
@@ -317,7 +317,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
         int ret = 0;
         int id;
  
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
         id = contraint_to_pl(rd, cid);
         if (id < 0) {
@@ -350,7 +350,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
         if (!ret)
                 package_power_limit_irq_save(rp);
  set_exit:
-       put_online_cpus();
+       cpus_read_unlock();
         return ret;
  }
  
@@ -363,7 +363,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
         int ret = 0;
         int id;
  
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
         id = contraint_to_pl(rd, cid);
         if (id < 0) {
@@ -382,7 +382,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
                 prim = POWER_LIMIT4;
                 break;
         default:
-               put_online_cpus();
+               cpus_read_unlock();
                 return -EINVAL;
         }
         if (rapl_read_data_raw(rd, prim, true, &val))
@@ -391,7 +391,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
                 *data = val;
  
  get_exit:
-       put_online_cpus();
+       cpus_read_unlock();
  
         return ret;
  }
@@ -403,7 +403,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
         int ret = 0;
         int id;
  
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
         id = contraint_to_pl(rd, cid);
         if (id < 0) {
@@ -423,7 +423,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
         }
  
  set_time_exit:
-       put_online_cpus();
+       cpus_read_unlock();
         return ret;
  }
  
@@ -435,7 +435,7 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
         int ret = 0;
         int id;
  
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
         id = contraint_to_pl(rd, cid);
         if (id < 0) {
@@ -458,14 +458,14 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
                 val = 0;
                 break;
         default:
-               put_online_cpus();
+               cpus_read_unlock();
                 return -EINVAL;
         }
         if (!ret)
                 *data = val;
  
  get_time_exit:
-       put_online_cpus();
+       cpus_read_unlock();
  
         return ret;
  }
@@ -491,7 +491,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
         int prim;
         int ret = 0;
  
-       get_online_cpus();
+       cpus_read_lock();
         rd = power_zone_to_rapl_domain(power_zone);
         switch (rd->rpl[id].prim_id) {
         case PL1_ENABLE:
@@ -504,7 +504,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
                 prim = MAX_POWER;
                 break;
         default:
-               put_online_cpus();
+               cpus_read_unlock();
                 return -EINVAL;
         }
         if (rapl_read_data_raw(rd, prim, true, &val))
@@ -516,7 +516,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
         if (rd->rpl[id].prim_id == PL4_ENABLE)
                 *data = *data * 2;
  
-       put_online_cpus();
+       cpus_read_unlock();
  
         return ret;
  }
@@ -1358,7 +1358,7 @@ static void power_limit_state_save(void)
         struct rapl_domain *rd;
         int nr_pl, ret, i;
  
-       get_online_cpus();
+       cpus_read_lock();
         list_for_each_entry(rp, &rapl_packages, plist) {
                 if (!rp->power_zone)
                         continue;
@@ -1390,7 +1390,7 @@ static void power_limit_state_save(void)
                         }
                 }
         }
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  static void power_limit_state_restore(void)
@@ -1399,7 +1399,7 @@ static void power_limit_state_restore(void)
         struct rapl_domain *rd;
         int nr_pl, i;
  
-       get_online_cpus();
+       cpus_read_lock();
         list_for_each_entry(rp, &rapl_packages, plist) {
                 if (!rp->power_zone)
                         continue;
@@ -1425,7 +1425,7 @@ static void power_limit_state_restore(void)
                         }
                 }
         }
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  static int rapl_pm_callback(struct notifier_block *nb,
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c

index cc3b228..1be45f3 100644 (file)
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -138,6 +138,8 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
  /* List of verified CPUs. */
  static const struct x86_cpu_id pl4_support_ids[] = {
         { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
         {}
  };
  
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c

index 99abdc0..dab7e8f 100644 (file)
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -569,13 +569,18 @@ static void notify_thresholds(__u64 msr_val)
                 platform_thermal_notify(msr_val);
  }
  
+void __weak notify_hwp_interrupt(void)
+{
+       wrmsrl_safe(MSR_HWP_STATUS, 0);
+}
+
  /* Thermal transition interrupt handler */
  void intel_thermal_interrupt(void)
  {
         __u64 msr_val;
  
         if (static_cpu_has(X86_FEATURE_HWP))
-               wrmsrl_safe(MSR_HWP_STATUS, 0);
+               notify_hwp_interrupt();
  
         rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
  
diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h

index 53f427b..01e7bed 100644 (file)
--- a/drivers/thermal/intel/thermal_interrupt.h
+++ b/drivers/thermal/intel/thermal_interrupt.h
@@ -12,4 +12,7 @@ extern int (*platform_thermal_notify)(__u64 msr_val);
   * callback has rate control */
  extern bool (*platform_thermal_package_rate_control)(void);
  
+/* Handle HWP interrupt */
+extern void notify_hwp_interrupt(void);
+
  #endif /* _INTEL_THERMAL_INTERRUPT_H */
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h

index 3f221db..1834752 100644 (file)
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -53,6 +53,22 @@ struct em_perf_domain {
  #ifdef CONFIG_ENERGY_MODEL
  #define EM_MAX_POWER 0xFFFF
  
+/*
+ * Increase resolution of energy estimation calculations for 64-bit
+ * architectures. The extra resolution improves decision made by EAS for the
+ * task placement when two Performance Domains might provide similar energy
+ * estimation values (w/o better resolution the values could be equal).
+ *
+ * We increase resolution only if we have enough bits to allow this increased
+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
+ * are pretty high and the returns do not justify the increased costs.
+ */
+#ifdef CONFIG_64BIT
+#define em_scale_power(p) ((p) * 1000)
+#else
+#define em_scale_power(p) (p)
+#endif
+
  struct em_data_callback {
         /**
          * active_power() - Provide power at the next performance state of
diff --git a/include/linux/notifier.h b/include/linux/notifier.h

index 2fb373a..87069b8 100644 (file)
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -168,8 +168,6 @@ extern int raw_notifier_call_chain(struct raw_notifier_head *nh,
  extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
                 unsigned long val, void *v);
  
-extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
-               unsigned long val_up, unsigned long val_down, void *v);
  extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
                 unsigned long val_up, unsigned long val_down, void *v);
  extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c

index f7e1d0e..246efc7 100644 (file)
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -13,19 +13,32 @@
  #include <linux/spinlock.h>
  #include <linux/syscore_ops.h>
  
-static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+/*
+ * atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT.
+ * Notifications for cpu_pm will be issued by the idle task itself, which can
+ * never block, IOW it requires using a raw_spinlock_t.
+ */
+static struct {
+       struct raw_notifier_head chain;
+       raw_spinlock_t lock;
+} cpu_pm_notifier = {
+       .chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain),
+       .lock  = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock),
+};
  
  static int cpu_pm_notify(enum cpu_pm_event event)
  {
         int ret;
  
         /*
-        * atomic_notifier_call_chain has a RCU read critical section, which
-        * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
-        * RCU know this.
+        * This introduces a RCU read critical section, which could be
+        * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
+        * this.
          */
         rcu_irq_enter_irqson();
-       ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);
+       rcu_read_lock();
+       ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
+       rcu_read_unlock();
         rcu_irq_exit_irqson();
  
         return notifier_to_errno(ret);
@@ -33,10 +46,13 @@ static int cpu_pm_notify(enum cpu_pm_event event)
  
  static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down)
  {
+       unsigned long flags;
         int ret;
  
         rcu_irq_enter_irqson();
-       ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);
+       raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+       ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
+       raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
         rcu_irq_exit_irqson();
  
         return notifier_to_errno(ret);
@@ -49,12 +65,17 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
   * Add a driver to a list of drivers that are notified about
   * CPU and CPU cluster low power entry and exit.
   *
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_register.
+ * This function has the same return conditions as raw_notifier_chain_register.
   */
  int cpu_pm_register_notifier(struct notifier_block *nb)
  {
-       return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+       unsigned long flags;
+       int ret;
+
+       raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+       ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb);
+       raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+       return ret;
  }
  EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
  
@@ -64,12 +85,17 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
   *
   * Remove a driver from the CPU PM notifier list.
   *
- * This function may sleep, and has the same return conditions as
- * raw_notifier_chain_unregister.
+ * This function has the same return conditions as raw_notifier_chain_unregister.
   */
  int cpu_pm_unregister_notifier(struct notifier_block *nb)
  {
-       return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+       unsigned long flags;
+       int ret;
+
+       raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
+       ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb);
+       raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
+       return ret;
  }
  EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
  
diff --git a/kernel/notifier.c b/kernel/notifier.c

index 1b019cb..b8251dc 100644 (file)
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -172,25 +172,6 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
  }
  EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
  
-int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
-               unsigned long val_up, unsigned long val_down, void *v)
-{
-       unsigned long flags;
-       int ret;
-
-       /*
-        * Musn't use RCU; because then the notifier list can
-        * change between the up and down traversal.
-        */
-       spin_lock_irqsave(&nh->lock, flags);
-       ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
-       spin_unlock_irqrestore(&nh->lock, flags);
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust);
-NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust);
-
  /**
   *     atomic_notifier_call_chain - Call functions in an atomic notifier chain
   *     @nh: Pointer to head of the atomic notifier chain
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c

index 0f4530b..a332ccd 100644 (file)
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -170,7 +170,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
         /* Compute the cost of each performance state. */
         fmax = (u64) table[nr_states - 1].frequency;
         for (i = 0; i < nr_states; i++) {
-               table[i].cost = div64_u64(fmax * table[i].power,
+               unsigned long power_res = em_scale_power(table[i].power);
+
+               table[i].cost = div64_u64(fmax * power_res,
                                           table[i].frequency);
         }
  
diff --git a/kernel/power/main.c b/kernel/power/main.c

index 12c7e1b..44169f3 100644 (file)
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -577,7 +577,7 @@ static inline void pm_print_times_init(void) {}
  
  struct kobject *power_kobj;
  
-/**
+/*
   * state - control system sleep states.
   *
   * show() returns available sleep state labels, which may be "mem", "standby",
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c

index d8cae43..eb75f39 100644 (file)
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -96,7 +96,7 @@ static void s2idle_enter(void)
         s2idle_state = S2IDLE_STATE_ENTER;
         raw_spin_unlock_irq(&s2idle_lock);
  
-       get_online_cpus();
+       cpus_read_lock();
         cpuidle_resume();
  
         /* Push all the CPUs into the idle loop. */
@@ -106,7 +106,7 @@ static void s2idle_enter(void)
                     s2idle_state == S2IDLE_STATE_WAKE);
  
         cpuidle_pause();
-       put_online_cpus();
+       cpus_read_unlock();
  
         raw_spin_lock_irq(&s2idle_lock);
  
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c

index e1ed58a..d20526c 100644 (file)
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -129,7 +129,7 @@ static int __init has_wakealarm(struct device *dev, const void *data)
  {
         struct rtc_device *candidate = to_rtc_device(dev);
  
-       if (!candidate->ops->set_alarm)
+       if (!test_bit(RTC_FEATURE_ALARM, candidate->features))
                 return 0;
         if (!device_may_wakeup(candidate->dev.parent))
                 return 0;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

index 5712461..e7af188 100644 (file)
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -537,9 +537,17 @@ static struct attribute *sugov_attrs[] = {
  };
  ATTRIBUTE_GROUPS(sugov);
  
+static void sugov_tunables_free(struct kobject *kobj)
+{
+       struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+
+       kfree(to_sugov_tunables(attr_set));
+}
+
  static struct kobj_type sugov_tunables_ktype = {
         .default_groups = sugov_groups,
         .sysfs_ops = &governor_sysfs_ops,
+       .release = &sugov_tunables_free,
  };
  
  /********************** cpufreq governor interface *********************/
@@ -639,12 +647,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic
         return tunables;
  }
  
-static void sugov_tunables_free(struct sugov_tunables *tunables)
+static void sugov_clear_global_tunables(void)
  {
         if (!have_governor_per_policy())
                 global_tunables = NULL;
-
-       kfree(tunables);
  }
  
  static int sugov_init(struct cpufreq_policy *policy)
@@ -707,7 +713,7 @@ out:
  fail:
         kobject_put(&tunables->attr_set.kobj);
         policy->governor_data = NULL;
-       sugov_tunables_free(tunables);
+       sugov_clear_global_tunables();
  
  stop_kthread:
         sugov_kthread_stop(sg_policy);
@@ -734,7 +740,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
         count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
         policy->governor_data = NULL;
         if (!count)
-               sugov_tunables_free(tunables);
+               sugov_clear_global_tunables();
  
         mutex_unlock(&global_tunables_lock);
author	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
	Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
	Mon, 30 Aug 2021 17:25:42 +0000 (19:25 +0200)
drivers/cpufreq/acpi-cpufreq.c		patch \| blob \| history
drivers/cpufreq/cpufreq.c		patch \| blob \| history
drivers/cpufreq/cpufreq_ondemand.c		patch \| blob \| history
drivers/cpufreq/intel_pstate.c		patch \| blob \| history
drivers/cpufreq/powernow-k8.c		patch \| blob \| history
drivers/cpufreq/powernv-cpufreq.c		patch \| blob \| history
drivers/opp/core.c		patch \| blob \| history
drivers/pci/pci.c		patch \| blob \| history
drivers/powercap/intel_rapl_common.c		patch \| blob \| history
drivers/powercap/intel_rapl_msr.c		patch \| blob \| history
drivers/thermal/intel/therm_throt.c		patch \| blob \| history
drivers/thermal/intel/thermal_interrupt.h		patch \| blob \| history
include/linux/energy_model.h		patch \| blob \| history
include/linux/notifier.h		patch \| blob \| history
kernel/cpu_pm.c		patch \| blob \| history
kernel/notifier.c		patch \| blob \| history
kernel/power/energy_model.c		patch \| blob \| history
kernel/power/main.c		patch \| blob \| history
kernel/power/suspend.c		patch \| blob \| history
kernel/power/suspend_test.c		patch \| blob \| history
kernel/sched/cpufreq_schedutil.c		patch \| blob \| history